diff --git a/build.sh b/build.sh
index b123d1c..a7c77bb 100644
--- a/build.sh
+++ b/build.sh
@@ -4,4 +4,4 @@ rm -rf linghe.egg-info &&
 python setup.py develop && 
 python setup.py bdist_wheel 
 
-# pdoc --output-dir docs -d google --no-include-undocumented --no-search --no-show-source  linghe
\ No newline at end of file
+# pdoc --output-dir docs -d google --no-include-undocumented --no-show-source  linghe
\ No newline at end of file
diff --git a/docs/linghe.html b/docs/linghe.html
index a7cef32..27dbb27 100644
--- a/docs/linghe.html
+++ b/docs/linghe.html
@@ -18,6 +18,8 @@
         <input id="togglestate" type="checkbox" aria-hidden="true" tabindex="-1">
         <div>
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
             <h2>Submodules</h2>
@@ -48,5 +50,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade.html b/docs/linghe/facade.html
index e50704a..ad2c300 100644
--- a/docs/linghe/facade.html
+++ b/docs/linghe/facade.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
             <h2>Submodules</h2>
@@ -57,5 +59,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/add.html b/docs/linghe/facade/add.html
index 57a6c73..185a77c 100644
--- a/docs/linghe/facade/add.html
+++ b/docs/linghe/facade/add.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -76,12 +78,193 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>return updated x tensor</p>
+  <p>updated x tensor</p>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/fp32_gemm.html b/docs/linghe/facade/fp32_gemm.html
index b4858e4..86fe2a4 100644
--- a/docs/linghe/facade/fp32_gemm.html
+++ b/docs/linghe/facade/fp32_gemm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -84,5 +86,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/hadamard_quant_linear.html b/docs/linghe/facade/hadamard_quant_linear.html
index 530195c..a713023 100644
--- a/docs/linghe/facade/hadamard_quant_linear.html
+++ b/docs/linghe/facade/hadamard_quant_linear.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -34,12 +36,6 @@ <h2>API Documentation</h2>
                         <li>
                                 <a class="function" href="#HadamardQuantLinear.__init__">HadamardQuantLinear</a>
                         </li>
-                        <li>
-                                <a class="function" href="#HadamardQuantLinear.forward">forward</a>
-                        </li>
-                        <li>
-                                <a class="function" href="#HadamardQuantLinear.extra_repr">extra_repr</a>
-                        </li>
                 </ul>
 
             </li>
@@ -76,41 +72,7 @@ <h1 class="modulename">
     </div>
     <a class="headerlink" href="#HadamardQuantLinear"></a>
     
-            <div class="docstring"><p>Base class for all neural network modules.</p>
-
-<p>Your models should also subclass this class.</p>
-
-<p>Modules can also contain other Modules, allowing them to be nested in
-a tree structure. You can assign the submodules as regular attributes::</p>
-
-<pre><code>import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Model(nn.Module):
-    def __init__(self) -&gt; None:
-        super().__init__()
-        self.conv1 = nn.Conv2d(1, 20, 5)
-        self.conv2 = nn.Conv2d(20, 20, 5)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        return F.relu(self.conv2(x))
-</code></pre>
-
-<p>Submodules assigned in this way will be registered, and will also have their
-parameters converted when you call <code><a href="#HadamardQuantLinear.to">to()</a></code>, etc.</p>
-
-<div class="alert note">
-
-<p>As per the example above, an <code><a href="#HadamardQuantLinear.__init__">__init__()</a></code> call to the parent class
-must be made before assignment on the child.</p>
-
-</div>
-
-<p>:ivar training: Boolean represents whether this module is in training or
-                evaluation mode.
-:vartype training: bool</p>
+            <div class="docstring"><p>a naive implementation of hadamard transformation and quantization</p>
 </div>
 
 
@@ -123,9 +85,7 @@ <h1 class="modulename">
     </div>
     <a class="headerlink" href="#HadamardQuantLinear.__init__"></a>
     
-            <div class="docstring"><p>a naive implementation of hadamard transformation and quantization</p>
-
-<h6 id="arguments">Arguments:</h6>
+            <div class="docstring"><h6 id="arguments">Arguments:</h6>
 
 <ul>
 <li><strong>in_features:</strong>  in feature number</li>
@@ -133,58 +93,193 @@ <h6 id="arguments">Arguments:</h6>
 <li><strong>bias:</strong>  whether use bias</li>
 <li><strong>device:</strong>  weight device</li>
 <li><strong>dtype:</strong>  weight dtype</li>
-<li><strong>impl:</strong>  implementation of hadamard quantization</li>
 </ul>
 </div>
 
 
-                            </div>
-                            <div id="HadamardQuantLinear.forward" class="classattr">
-                                <div class="attr function">
-            
-        <span class="def">def</span>
-        <span class="name">forward</span><span class="signature pdoc-code condensed">(<span class="param"><span class="bp">self</span>, </span><span class="param"><span class="nb">input</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span></span><span class="return-annotation">) -> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span>:</span></span>
-
-        
-    </div>
-    <a class="headerlink" href="#HadamardQuantLinear.forward"></a>
-    
-            <div class="docstring"><p>Define the computation performed at every call.</p>
-
-<p>Should be overridden by all subclasses.</p>
-
-<div class="alert note">
-
-<p>Although the recipe for forward pass needs to be defined within
-this function, one should call the <code>Module</code> instance afterwards
-instead of this since the former takes care of running the
-registered hooks while the latter silently ignores them.</p>
-
-</div>
-</div>
-
-
-                            </div>
-                            <div id="HadamardQuantLinear.extra_repr" class="classattr">
-                                <div class="attr function">
-            
-        <span class="def">def</span>
-        <span class="name">extra_repr</span><span class="signature pdoc-code condensed">(<span class="param"><span class="bp">self</span></span><span class="return-annotation">) -> <span class="nb">str</span>:</span></span>
-
-        
-    </div>
-    <a class="headerlink" href="#HadamardQuantLinear.extra_repr"></a>
-    
-            <div class="docstring"><p>Return the extra representation of the module.</p>
-
-<p>To print customized extra information, you should re-implement
-this method in your own modules. Both single-line and multi-line
-strings are acceptable.</p>
-</div>
-
-
                             </div>
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/loss.html b/docs/linghe/facade/loss.html
index 21cda4a..8b6d236 100644
--- a/docs/linghe/facade/loss.html
+++ b/docs/linghe/facade/loss.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -77,12 +79,193 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>per token loss</p>
+  <p>a tensor of per token loss</p>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/norm.html b/docs/linghe/facade/norm.html
index e858009..9f264bf 100644
--- a/docs/linghe/facade/norm.html
+++ b/docs/linghe/facade/norm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -118,5 +120,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/rope.html b/docs/linghe/facade/rope.html
index 0d39eca..b9189f9 100644
--- a/docs/linghe/facade/rope.html
+++ b/docs/linghe/facade/rope.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -81,14 +83,197 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>qo: shape [B, S, H, head_dim]
-  ko: shape [B, S, h, head_dim]
-  vo: shape [B, S, h, head_dim]</p>
+  <ul>
+  <li>qo: shape [B, S, H, head_dim]</li>
+  <li>ko: shape [B, S, h, head_dim]</li>
+  <li>vo: shape [B, S, h, head_dim]</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/smooth_quant_linear.html b/docs/linghe/facade/smooth_quant_linear.html
index 2752e47..05abcc0 100644
--- a/docs/linghe/facade/smooth_quant_linear.html
+++ b/docs/linghe/facade/smooth_quant_linear.html
@@ -23,22 +23,18 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
             <h2>API Documentation</h2>
                 <ul class="memberlist">
             <li>
-                    <a class="class" href="#QuantLinear">QuantLinear</a>
+                    <a class="class" href="#SmoothQuantLinear">SmoothQuantLinear</a>
                             <ul class="memberlist">
                         <li>
-                                <a class="function" href="#QuantLinear.__init__">QuantLinear</a>
-                        </li>
-                        <li>
-                                <a class="function" href="#QuantLinear.forward">forward</a>
-                        </li>
-                        <li>
-                                <a class="function" href="#QuantLinear.extra_repr">extra_repr</a>
+                                <a class="function" href="#SmoothQuantLinear.__init__">SmoothQuantLinear</a>
                         </li>
                 </ul>
 
@@ -66,114 +62,224 @@ <h1 class="modulename">
                 
                 
             </section>
-                <section id="QuantLinear">
+                <section id="SmoothQuantLinear">
                     <div class="attr class">
             
     <span class="def">class</span>
-    <span class="name">QuantLinear</span><wbr>(<span class="base">torch.nn.modules.module.Module</span>):
-
-        
-    </div>
-    <a class="headerlink" href="#QuantLinear"></a>
-    
-            <div class="docstring"><p>Base class for all neural network modules.</p>
-
-<p>Your models should also subclass this class.</p>
-
-<p>Modules can also contain other Modules, allowing them to be nested in
-a tree structure. You can assign the submodules as regular attributes::</p>
-
-<pre><code>import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Model(nn.Module):
-    def __init__(self) -&gt; None:
-        super().__init__()
-        self.conv1 = nn.Conv2d(1, 20, 5)
-        self.conv2 = nn.Conv2d(20, 20, 5)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        return F.relu(self.conv2(x))
-</code></pre>
-
-<p>Submodules assigned in this way will be registered, and will also have their
-parameters converted when you call <code><a href="#QuantLinear.to">to()</a></code>, etc.</p>
-
-<div class="alert note">
-
-<p>As per the example above, an <code><a href="#QuantLinear.__init__">__init__()</a></code> call to the parent class
-must be made before assignment on the child.</p>
-
-</div>
-
-<p>:ivar training: Boolean represents whether this module is in training or
-                evaluation mode.
-:vartype training: bool</p>
-</div>
-
-
-                            <div id="QuantLinear.__init__" class="classattr">
-                                <div class="attr function">
-            
-        <span class="name">QuantLinear</span><span class="signature pdoc-code multiline">(<span class="param">	<span class="n">in_features</span><span class="p">:</span> <span class="nb">int</span>,</span><span class="param">	<span class="n">out_features</span><span class="p">:</span> <span class="nb">int</span>,</span><span class="param">	<span class="n">bias</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>,</span><span class="param">	<span class="n">device</span><span class="o">=</span><span class="kc">None</span>,</span><span class="param">	<span class="n">dtype</span><span class="o">=</span><span class="kc">None</span></span>)</span>
+    <span class="name">SmoothQuantLinear</span><wbr>(<span class="base">torch.nn.modules.module.Module</span>):
 
         
     </div>
-    <a class="headerlink" href="#QuantLinear.__init__"></a>
+    <a class="headerlink" href="#SmoothQuantLinear"></a>
     
-            <div class="docstring"><p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p>
+            <div class="docstring"><p>a naive implementation of smooth quantization linear</p>
 </div>
 
 
-                            </div>
-                            <div id="QuantLinear.forward" class="classattr">
+                            <div id="SmoothQuantLinear.__init__" class="classattr">
                                 <div class="attr function">
             
-        <span class="def">def</span>
-        <span class="name">forward</span><span class="signature pdoc-code condensed">(<span class="param"><span class="bp">self</span>, </span><span class="param"><span class="nb">input</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span></span><span class="return-annotation">) -> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span>:</span></span>
+        <span class="name">SmoothQuantLinear</span><span class="signature pdoc-code multiline">(<span class="param">	<span class="n">in_features</span><span class="p">:</span> <span class="nb">int</span>,</span><span class="param">	<span class="n">out_features</span><span class="p">:</span> <span class="nb">int</span>,</span><span class="param">	<span class="n">bias</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>,</span><span class="param">	<span class="n">device</span><span class="o">=</span><span class="kc">None</span>,</span><span class="param">	<span class="n">dtype</span><span class="o">=</span><span class="kc">None</span></span>)</span>
 
         
     </div>
-    <a class="headerlink" href="#QuantLinear.forward"></a>
+    <a class="headerlink" href="#SmoothQuantLinear.__init__"></a>
     
-            <div class="docstring"><p>Define the computation performed at every call.</p>
-
-<p>Should be overridden by all subclasses.</p>
-
-<div class="alert note">
-
-<p>Although the recipe for forward pass needs to be defined within
-this function, one should call the <code>Module</code> instance afterwards
-instead of this since the former takes care of running the
-registered hooks while the latter silently ignores them.</p>
-
-</div>
-</div>
-
-
-                            </div>
-                            <div id="QuantLinear.extra_repr" class="classattr">
-                                <div class="attr function">
-            
-        <span class="def">def</span>
-        <span class="name">extra_repr</span><span class="signature pdoc-code condensed">(<span class="param"><span class="bp">self</span></span><span class="return-annotation">) -> <span class="nb">str</span>:</span></span>
-
-        
-    </div>
-    <a class="headerlink" href="#QuantLinear.extra_repr"></a>
-    
-            <div class="docstring"><p>Return the extra representation of the module.</p>
-
-<p>To print customized extra information, you should re-implement
-this method in your own modules. Both single-line and multi-line
-strings are acceptable.</p>
+            <div class="docstring"><h6 id="arguments">Arguments:</h6>
+
+<ul>
+<li><strong>in_features:</strong>  in feature number</li>
+<li><strong>out_features:</strong>  out feature number</li>
+<li><strong>bias:</strong>  whether use bias</li>
+<li><strong>device:</strong>  weight device</li>
+<li><strong>dtype:</strong>  weight dtype</li>
+</ul>
 </div>
 
 
                             </div>
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/facade/transpose.html b/docs/linghe/facade/transpose.html
index 9d645db..6198ec3 100644
--- a/docs/linghe/facade/transpose.html
+++ b/docs/linghe/facade/transpose.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.facade</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -82,5 +84,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/gemm.html b/docs/linghe/gemm.html
index 3175a31..3e9ae37 100644
--- a/docs/linghe/gemm.html
+++ b/docs/linghe/gemm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
             <h2>Submodules</h2>
@@ -52,5 +54,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/gemm/blockwise_fp8_gemm.html b/docs/linghe/gemm/blockwise_fp8_gemm.html
index 69ab790..1e6e099 100644
--- a/docs/linghe/gemm/blockwise_fp8_gemm.html
+++ b/docs/linghe/gemm/blockwise_fp8_gemm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.gemm</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -52,5 +54,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/gemm/channelwise_fp8_gemm.html b/docs/linghe/gemm/channelwise_fp8_gemm.html
index c8c6831..ff2a283 100644
--- a/docs/linghe/gemm/channelwise_fp8_gemm.html
+++ b/docs/linghe/gemm/channelwise_fp8_gemm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.gemm</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -89,5 +91,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/gemm/fp32_gemm.html b/docs/linghe/gemm/fp32_gemm.html
index f0f3faf..358a970 100644
--- a/docs/linghe/gemm/fp32_gemm.html
+++ b/docs/linghe/gemm/fp32_gemm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.gemm</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -183,7 +185,11 @@ <h6 id="arguments">Arguments:</h6>
 <li><strong>scale:</strong>  scale for activation tensor, 1/rms</li>
 </ul>
 
-<p>Returns:</p>
+<h6 id="returns">Returns:</h6>
+
+<blockquote>
+  <p>output tensor</p>
+</blockquote>
 </div>
 
 
@@ -218,5 +224,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/quant.html b/docs/linghe/quant.html
index bd6573d..4264812 100644
--- a/docs/linghe/quant.html
+++ b/docs/linghe/quant.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
             <h2>Submodules</h2>
@@ -54,5 +56,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/quant/block.html b/docs/linghe/quant/block.html
index a562b18..957cbb2 100644
--- a/docs/linghe/quant/block.html
+++ b/docs/linghe/quant/block.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.quant</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -77,13 +79,196 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>y: quantized tensor, float8_e4m3fn
-  s: quantization scale, float32</p>
+  <ul>
+  <li>y: quantized tensor, float8_e4m3fn</li>
+  <li>s: quantization scale, float32</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/quant/channel.html b/docs/linghe/quant/channel.html
index 80243be..d4f506a 100644
--- a/docs/linghe/quant/channel.html
+++ b/docs/linghe/quant/channel.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.quant</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -140,13 +142,196 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>x_q: quantized tensor
-  x_scale: quantization scale</p>
+  <ul>
+  <li>x_q: quantized tensor</li>
+  <li>x_scale: quantization scale</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/quant/group.html b/docs/linghe/quant/group.html
index e0191d6..9d311e9 100644
--- a/docs/linghe/quant/group.html
+++ b/docs/linghe/quant/group.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.quant</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -77,13 +79,196 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>y: quantized tensor, float8_e4m3fn
-  s: quantization scale, float32</p>
+  <ul>
+  <li>y: quantized tensor, float8_e4m3fn</li>
+  <li>s: quantization scale, float32</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/quant/hadamard.html b/docs/linghe/quant/hadamard.html
index 917cf30..d956495 100644
--- a/docs/linghe/quant/hadamard.html
+++ b/docs/linghe/quant/hadamard.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.quant</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -76,15 +78,198 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>x_q: rowwise quantized tensor of non-transposed x
-  x_scale: rowwise quantization scale of non-transposed x
-  xt_q: columnwise quantized tensor of transposed x
-  xt_scale: columnwise quantization scale of transposed x</p>
+  <ul>
+  <li>x_q: rowwise quantized tensor of non-transposed x</li>
+  <li>x_scale: rowwise quantization scale of non-transposed x</li>
+  <li>xt_q: columnwise quantized tensor of transposed x</li>
+  <li>xt_scale: columnwise quantization scale of transposed x</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/quant/smooth.html b/docs/linghe/quant/smooth.html
index c903c18..5922806 100644
--- a/docs/linghe/quant/smooth.html
+++ b/docs/linghe/quant/smooth.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.quant</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -52,5 +54,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils.html b/docs/linghe/utils.html
index e37e3fe..afd4a60 100644
--- a/docs/linghe/utils.html
+++ b/docs/linghe/utils.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
             <h2>Submodules</h2>
@@ -60,5 +62,186 @@ <h1 class="modulename">
                 
             </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/add.html b/docs/linghe/utils/add.html
index 9c8fe47..e84d1b8 100644
--- a/docs/linghe/utils/add.html
+++ b/docs/linghe/utils/add.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -84,5 +86,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/dot.html b/docs/linghe/utils/dot.html
index 49a7b28..67b62f7 100644
--- a/docs/linghe/utils/dot.html
+++ b/docs/linghe/utils/dot.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -84,5 +86,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/gather.html b/docs/linghe/utils/gather.html
index 8f18e38..a7444c8 100644
--- a/docs/linghe/utils/gather.html
+++ b/docs/linghe/utils/gather.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -315,7 +317,7 @@ <h6 id="arguments">Arguments:</h6>
     </div>
     <a class="headerlink" href="#triton_smooth_permute_with_mask_map"></a>
     
-            <div class="docstring"><p>gather and optional dequant and smooth quant</p>
+            <div class="docstring"><p>gather ( and optional dequant) and smooth quant</p>
 
 <h6 id="arguments">Arguments:</h6>
 
@@ -332,11 +334,199 @@ <h6 id="arguments">Arguments:</h6>
 <li><strong>round_scale:</strong> </li>
 </ul>
 
-<p>Returns:</p>
+<h6 id="returns">Returns:</h6>
+
+<blockquote>
+  <ul>
+  <li>output: output tensor</li>
+  <li>permuted_scale: permuted scale if scale is not None</li>
+  </ul>
+</blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/loss.html b/docs/linghe/utils/loss.html
index 73e62df..6ca4c4f 100644
--- a/docs/linghe/utils/loss.html
+++ b/docs/linghe/utils/loss.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -117,5 +119,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/norm.html b/docs/linghe/utils/norm.html
index 63e9fb9..4db407c 100644
--- a/docs/linghe/utils/norm.html
+++ b/docs/linghe/utils/norm.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -118,11 +120,14 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>out: quantization data
-  scale: quantization scale
-  rms: Reciprocal of the root mean square of the input calculated over the last dimension.
-  transpose_output: quantization data of transposed gradient
-  transpose_scale: quantization scale of transposed gradient</p>
+  <ul>
+  <li>out: quantization data.</li>
+  <li>scale: quantization scale.</li>
+  <li>rms: Reciprocal of the root mean square of the
+  input calculated over the last dimension.</li>
+  <li>transpose_output: quantization data of transposed gradient.</li>
+  <li>transpose_scale: quantization scale of transposed gradient.</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -160,5 +165,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/rearange.html b/docs/linghe/utils/rearange.html
index dab027f..ec9cc8e 100644
--- a/docs/linghe/utils/rearange.html
+++ b/docs/linghe/utils/rearange.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -79,13 +81,196 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>y: output tensor
-  output_scales: output scales if scales is not None</p>
+  <ul>
+  <li>y: output tensor</li>
+  <li>output_scales: output scales if scales is not None</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/reduce.html b/docs/linghe/utils/reduce.html
index d000c2d..0aeb0ae 100644
--- a/docs/linghe/utils/reduce.html
+++ b/docs/linghe/utils/reduce.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -147,5 +149,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/rope.html b/docs/linghe/utils/rope.html
index df394a3..c44d6c5 100644
--- a/docs/linghe/utils/rope.html
+++ b/docs/linghe/utils/rope.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -83,8 +85,10 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>qo:
-  ko:</p>
+  <ul>
+  <li>qo: query output</li>
+  <li>ko: key output</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -121,9 +125,11 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>qo: shape [B, S, H, head_dim]
-  ko: shape [B, S, h, head_dim]
-  vo: shape [B, S, h, head_dim]</p>
+  <ul>
+  <li>qo: shape [B, S, H, head_dim]</li>
+  <li>ko: shape [B, S, h, head_dim]</li>
+  <li>vo: shape [B, S, h, head_dim]</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -159,14 +165,197 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>dqkv: gradient of qkv
-  dqw: gradient of q_norm_weight
-  dkw: gradient of k_norm_weight</p>
+  <ul>
+  <li>dqkv: gradient of qkv</li>
+  <li>dqw: gradient of q_norm_weight</li>
+  <li>dkw: gradient of k_norm_weight</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/scatter.html b/docs/linghe/utils/scatter.html
index 68e5f39..360ddf8 100644
--- a/docs/linghe/utils/scatter.html
+++ b/docs/linghe/utils/scatter.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -113,7 +115,7 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>outputs</p>
+  <p>output tensor</p>
 </blockquote>
 </div>
 
@@ -142,13 +144,196 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>output: [num_tokens, hidden_size]
-  restore_probs: [num_tokens, num_experts]</p>
+  <ul>
+  <li>output: [num_tokens, hidden_size]</li>
+  <li>restore_probs: [num_tokens, num_experts]</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/silu.html b/docs/linghe/utils/silu.html
index cbb5131..75bc83a 100644
--- a/docs/linghe/utils/silu.html
+++ b/docs/linghe/utils/silu.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -120,8 +122,10 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>dx: gradient of x
-  dw: gradient of weight</p>
+  <ul>
+  <li>dx: gradient of x</li>
+  <li>dw: gradient of weight</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -153,10 +157,12 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>out: quantized tensor
-  scale: quantization scale
-  transpose_output: quantized tensor of transposed output
-  transpose_scale: quantization scale of transposed output</p>
+  <ul>
+  <li>out: quantized tensor</li>
+  <li>scale: quantization scale</li>
+  <li>transpose_output: quantized tensor of transposed output</li>
+  <li>transpose_scale: quantization scale of transposed output</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -185,10 +191,12 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>dx: quantized non-transposed gradient
-  dx_scale: scales of quantization non-transposed gradient
-  transpose_dx: quantized transposed gradient
-  transpose_dx_scale: scales of quantization transposed gradient</p>
+  <ul>
+  <li>dx: quantized non-transposed gradient</li>
+  <li>dx_scale: scales of quantization non-transposed gradient</li>
+  <li>transpose_dx: quantized transposed gradient</li>
+  <li>transpose_dx_scale: scales of quantization transposed gradient</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -223,10 +231,12 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>out: quantized tensor
-  scale: quantization scale
-  transpose_output: quantized tensor of transposed output
-  transpose_scale: quantization scale of transposed output</p>
+  <ul>
+  <li>out: quantized tensor</li>
+  <li>scale: quantization scale</li>
+  <li>transpose_output: quantized tensor of transposed output</li>
+  <li>transpose_scale: quantization scale of transposed output</li>
+  </ul>
 </blockquote>
 </div>
 
@@ -258,16 +268,199 @@ <h6 id="arguments">Arguments:</h6>
 <h6 id="returns">Returns:</h6>
 
 <blockquote>
-  <p>dx: quantized non-transposed gradient
-  dx_scale: scales of quantization non-transposed gradient
-  dw: gradient of weight
-  transpose_dx: quantized transposed gradient
-  transpose_dx_scale: scales of quantization transposed gradient</p>
+  <ul>
+  <li>dx: quantized non-transposed gradient</li>
+  <li>dx_scale: scales of quantization non-transposed gradient</li>
+  <li>dw: gradient of weight</li>
+  <li>transpose_dx: quantized transposed gradient</li>
+  <li>transpose_dx_scale: scales of quantization transposed gradient</li>
+  </ul>
 </blockquote>
 </div>
 
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/linghe/utils/transpose.html b/docs/linghe/utils/transpose.html
index b278ac2..df81d84 100644
--- a/docs/linghe/utils/transpose.html
+++ b/docs/linghe/utils/transpose.html
@@ -23,6 +23,8 @@
 </svg>                &nbsp;linghe.utils</a>
 
 
+            <input type="search" placeholder="Search..." role="searchbox" aria-label="search"
+                   pattern=".+" required>
 
 
 
@@ -180,5 +182,186 @@ <h6 id="returns">Returns:</h6>
 
                 </section>
     </main>
-</body>
+<script>
+    function escapeHTML(html) {
+        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
+    }
+
+    const originalContent = document.querySelector("main.pdoc");
+    let currentContent = originalContent;
+
+    function setContent(innerHTML) {
+        let elem;
+        if (innerHTML) {
+            elem = document.createElement("main");
+            elem.classList.add("pdoc");
+            elem.innerHTML = innerHTML;
+        } else {
+            elem = originalContent;
+        }
+        if (currentContent !== elem) {
+            currentContent.replaceWith(elem);
+            currentContent = elem;
+        }
+    }
+
+    function getSearchTerm() {
+        return (new URL(window.location)).searchParams.get("search");
+    }
+
+    const searchBox = document.querySelector(".pdoc input[type=search]");
+    searchBox.addEventListener("input", function () {
+        let url = new URL(window.location);
+        if (searchBox.value.trim()) {
+            url.hash = "";
+            url.searchParams.set("search", searchBox.value);
+        } else {
+            url.searchParams.delete("search");
+        }
+        history.replaceState("", "", url.toString());
+        onInput();
+    });
+    window.addEventListener("popstate", onInput);
+
+
+    let search, searchErr;
+
+    async function initialize() {
+        try {
+            search = await new Promise((resolve, reject) => {
+                const script = document.createElement("script");
+                script.type = "text/javascript";
+                script.async = true;
+                script.onload = () => resolve(window.pdocSearch);
+                script.onerror = (e) => reject(e);
+                script.src = "../../search.js";
+                document.getElementsByTagName("head")[0].appendChild(script);
+            });
+        } catch (e) {
+            console.error("Cannot fetch pdoc search index");
+            searchErr = "Cannot fetch search index.";
+        }
+        onInput();
+
+        document.querySelector("nav.pdoc").addEventListener("click", e => {
+            if (e.target.hash) {
+                searchBox.value = "";
+                searchBox.dispatchEvent(new Event("input"));
+            }
+        });
+    }
+
+    function onInput() {
+        setContent((() => {
+            const term = getSearchTerm();
+            if (!term) {
+                return null
+            }
+            if (searchErr) {
+                return `<h3>Error: ${searchErr}</h3>`
+            }
+            if (!search) {
+                return "<h3>Searching...</h3>"
+            }
+
+            window.scrollTo({top: 0, left: 0, behavior: 'auto'});
+
+            const results = search(term);
+
+            let html;
+            if (results.length === 0) {
+                html = `No search results for '${escapeHTML(term)}'.`
+            } else {
+                html = `<h4>${results.length} search result${results.length > 1 ? "s" : ""} for '${escapeHTML(term)}'.</h4>`;
+            }
+            for (let result of results.slice(0, 10)) {
+                let doc = result.doc;
+                let url = `../../${doc.modulename.replaceAll(".", "/")}.html`;
+                if (doc.qualname) {
+                    url += `#${doc.qualname}`;
+                }
+
+                let heading;
+                switch (result.doc.kind) {
+                    case "function":
+                        if (doc.fullname.endsWith(".__init__")) {
+                            heading = `<span class="name">${doc.fullname.replace(/\.__init__$/, "")}</span>${doc.signature}`;
+                        } else {
+                            heading = `<span class="def">${doc.funcdef}</span> <span class="name">${doc.fullname}</span>${doc.signature}`;
+                        }
+                        break;
+                    case "class":
+                        heading = `<span class="def">class</span> <span class="name">${doc.fullname}</span>`;
+                        if (doc.bases)
+                            heading += `<wbr>(<span class="base">${doc.bases}</span>)`;
+                        heading += `:`;
+                        break;
+                    case "variable":
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        if (doc.annotation)
+                            heading += `<span class="annotation">${doc.annotation}</span>`;
+                        if (doc.default_value)
+                            heading += `<span class="default_value"> = ${doc.default_value}</span>`;
+                        break;
+                    default:
+                        heading = `<span class="name">${doc.fullname}</span>`;
+                        break;
+                }
+                html += `
+                        <section class="search-result">
+                        <a href="${url}" class="attr ${doc.kind}">${heading}</a>
+                        <div class="docstring">${doc.doc}</div>
+                        </section>
+                    `;
+
+            }
+            return html;
+        })());
+    }
+
+    if (getSearchTerm()) {
+        initialize();
+        searchBox.value = getSearchTerm();
+        onInput();
+    } else {
+        searchBox.addEventListener("focus", initialize, {once: true});
+    }
+
+    searchBox.addEventListener("keydown", e => {
+        if (["ArrowDown", "ArrowUp", "Enter"].includes(e.key)) {
+            let focused = currentContent.querySelector(".search-result.focused");
+            if (!focused) {
+                currentContent.querySelector(".search-result").classList.add("focused");
+            } else if (
+                e.key === "ArrowDown"
+                && focused.nextElementSibling
+                && focused.nextElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.nextElementSibling.classList.add("focused");
+                focused.nextElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "ArrowUp"
+                && focused.previousElementSibling
+                && focused.previousElementSibling.classList.contains("search-result")
+            ) {
+                focused.classList.remove("focused");
+                focused.previousElementSibling.classList.add("focused");
+                focused.previousElementSibling.scrollIntoView({
+                    behavior: "smooth",
+                    block: "nearest",
+                    inline: "nearest"
+                });
+            } else if (
+                e.key === "Enter"
+            ) {
+                focused.querySelector("a").click();
+            }
+        }
+    });
+</script></body>
 </html>
\ No newline at end of file
diff --git a/docs/search.js b/docs/search.js
index 23741f0..e1491d0 100644
--- a/docs/search.js
+++ b/docs/search.js
@@ -1,6 +1,6 @@
 window.pdocSearch = (function(){
 /** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u<s.length;u++){var a=s[u];r[a]=this.pipeline.run(t.tokenizer(e[a]))}var l={};for(var c in o){var d=r[c]||r.any;if(d){var f=this.fieldSearch(d,c,o),h=o[c].boost;for(var p in f)f[p]=f[p]*h;for(var p in f)p in l?l[p]+=f[p]:l[p]=f[p]}}var v,g=[];for(var p in l)v={ref:p,score:l[p]},this.documentStore.hasDoc(p)&&(v.doc=this.documentStore.getDoc(p)),g.push(v);return g.sort(function(e,t){return t.score-e.score}),g},t.Index.prototype.fieldSearch=function(e,t,n){var i=n[t].bool,o=n[t].expand,r=n[t].boost,s=null,u={};return 0!==r?(e.forEach(function(e){var n=[e];1==o&&(n=this.index[t].expandToken(e));var r={};n.forEach(function(n){var o=this.index[t].getDocs(n),a=this.idf(n,t);if(s&&"AND"==i){var l={};for(var c in s)c in o&&(l[c]=o[c]);o=l}n==e&&this.fieldSearchStats(u,n,o);for(var c in o){var d=this.index[t].getTermFrequency(n,c),f=this.documentStore.getFieldLength(c,t),h=1;0!=f&&(h=1/Math.sqrt(f));var p=1;n!=e&&(p=.15*(1-(n.length-e.length)/n.length));var v=d*a*h*p;c in r?r[c]+=v:r[c]=v}},this),s=this.mergeScores(s,r,i)},this),s=this.coordNorm(s,u,e.length)):void 0},t.Index.prototype.mergeScores=function(e,t,n){if(!e)return t;if("AND"==n){var i={};for(var o in t)o in e&&(i[o]=e[o]+t[o]);return i}for(var o in t)o in e?e[o]+=t[o]:e[o]=t[o];return e},t.Index.prototype.fieldSearchStats=function(e,t,n){for(var i in n)i in e?e[i].push(t):e[i]=[t]},t.Index.prototype.coordNorm=function(e,t,n){for(var i in e)if(i in t){var o=t[i].length;e[i]=e[i]*o/n}return e},t.Index.prototype.toJSON=function(){var e={};return this._fields.forEach(function(t){e[t]=this.index[t].toJSON()},this),{version:t.version,fields:this._fields,ref:this._ref,documentStore:this.documentStore.toJSON(),index:e,pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(e){var t=Array.prototype.slice.call(arguments,1);t.unshift(this),e.apply(this,t)},t.DocumentStore=function(e){this._save=null===e||void 0===e?!0:e,this.docs={},this.docInfo={},this.length=0},t.DocumentStore.load=function(e){var t=new this;return t.length=e.length,t.docs=e.docs,t.docInfo=e.docInfo,t._save=e.save,t},t.DocumentStore.prototype.isDocStored=function(){return this._save},t.DocumentStore.prototype.addDoc=function(t,n){this.hasDoc(t)||this.length++,this.docs[t]=this._save===!0?e(n):null},t.DocumentStore.prototype.getDoc=function(e){return this.hasDoc(e)===!1?null:this.docs[e]},t.DocumentStore.prototype.hasDoc=function(e){return e in this.docs},t.DocumentStore.prototype.removeDoc=function(e){this.hasDoc(e)&&(delete this.docs[e],delete this.docInfo[e],this.length--)},t.DocumentStore.prototype.addFieldLength=function(e,t,n){null!==e&&void 0!==e&&0!=this.hasDoc(e)&&(this.docInfo[e]||(this.docInfo[e]={}),this.docInfo[e][t]=n)},t.DocumentStore.prototype.updateFieldLength=function(e,t,n){null!==e&&void 0!==e&&0!=this.hasDoc(e)&&this.addFieldLength(e,t,n)},t.DocumentStore.prototype.getFieldLength=function(e,t){return null===e||void 0===e?0:e in this.docs&&t in this.docInfo[e]?this.docInfo[e][t]:0},t.DocumentStore.prototype.toJSON=function(){return{docs:this.docs,docInfo:this.docInfo,length:this.length,save:this._save}},t.stemmer=function(){var e={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},t={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",o=n+"[^aeiouy]*",r=i+"[aeiou]*",s="^("+o+")?"+r+o,u="^("+o+")?"+r+o+"("+r+")?$",a="^("+o+")?"+r+o+r+o,l="^("+o+")?"+i,c=new RegExp(s),d=new RegExp(a),f=new RegExp(u),h=new RegExp(l),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,x=new RegExp("([^aeiouylsz])\\1$"),w=new RegExp("^"+o+i+"[^aeiouwxy]$"),I=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,D=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,F=/^(.+?)(s|t)(ion)$/,_=/^(.+?)e$/,P=/ll$/,k=new RegExp("^"+o+i+"[^aeiouwxy]$"),z=function(n){var i,o,r,s,u,a,l;if(n.length<3)return n;if(r=n.substr(0,1),"y"==r&&(n=r.toUpperCase()+n.substr(1)),s=p,u=v,s.test(n)?n=n.replace(s,"$1$2"):u.test(n)&&(n=n.replace(u,"$1$2")),s=g,u=m,s.test(n)){var z=s.exec(n);s=c,s.test(z[1])&&(s=y,n=n.replace(s,""))}else if(u.test(n)){var z=u.exec(n);i=z[1],u=h,u.test(i)&&(n=i,u=S,a=x,l=w,u.test(n)?n+="e":a.test(n)?(s=y,n=n.replace(s,"")):l.test(n)&&(n+="e"))}if(s=I,s.test(n)){var z=s.exec(n);i=z[1],n=i+"i"}if(s=b,s.test(n)){var z=s.exec(n);i=z[1],o=z[2],s=c,s.test(i)&&(n=i+e[o])}if(s=E,s.test(n)){var z=s.exec(n);i=z[1],o=z[2],s=c,s.test(i)&&(n=i+t[o])}if(s=D,u=F,s.test(n)){var z=s.exec(n);i=z[1],s=d,s.test(i)&&(n=i)}else if(u.test(n)){var z=u.exec(n);i=z[1]+z[2],u=d,u.test(i)&&(n=i)}if(s=_,s.test(n)){var z=s.exec(n);i=z[1],s=d,u=f,a=k,(s.test(i)||u.test(i)&&!a.test(i))&&(n=i)}return s=P,u=d,s.test(n)&&u.test(n)&&(s=y,n=n.replace(s,"")),"y"==r&&(n=r.toLowerCase()+n.substr(1)),n};return z}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.stopWordFilter=function(e){return e&&t.stopWordFilter.stopWords[e]!==!0?e:void 0},t.clearStopWords=function(){t.stopWordFilter.stopWords={}},t.addStopWords=function(e){null!=e&&Array.isArray(e)!==!1&&e.forEach(function(e){t.stopWordFilter.stopWords[e]=!0},this)},t.resetStopWords=function(){t.stopWordFilter.stopWords=t.defaultStopWords},t.defaultStopWords={"":!0,a:!0,able:!0,about:!0,across:!0,after:!0,all:!0,almost:!0,also:!0,am:!0,among:!0,an:!0,and:!0,any:!0,are:!0,as:!0,at:!0,be:!0,because:!0,been:!0,but:!0,by:!0,can:!0,cannot:!0,could:!0,dear:!0,did:!0,"do":!0,does:!0,either:!0,"else":!0,ever:!0,every:!0,"for":!0,from:!0,get:!0,got:!0,had:!0,has:!0,have:!0,he:!0,her:!0,hers:!0,him:!0,his:!0,how:!0,however:!0,i:!0,"if":!0,"in":!0,into:!0,is:!0,it:!0,its:!0,just:!0,least:!0,let:!0,like:!0,likely:!0,may:!0,me:!0,might:!0,most:!0,must:!0,my:!0,neither:!0,no:!0,nor:!0,not:!0,of:!0,off:!0,often:!0,on:!0,only:!0,or:!0,other:!0,our:!0,own:!0,rather:!0,said:!0,say:!0,says:!0,she:!0,should:!0,since:!0,so:!0,some:!0,than:!0,that:!0,the:!0,their:!0,them:!0,then:!0,there:!0,these:!0,they:!0,"this":!0,tis:!0,to:!0,too:!0,twas:!0,us:!0,wants:!0,was:!0,we:!0,were:!0,what:!0,when:!0,where:!0,which:!0,"while":!0,who:!0,whom:!0,why:!0,will:!0,"with":!0,would:!0,yet:!0,you:!0,your:!0},t.stopWordFilter.stopWords=t.defaultStopWords,t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(e){if(null===e||void 0===e)throw new Error("token should not be undefined");return e.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.InvertedIndex=function(){this.root={docs:{},df:0}},t.InvertedIndex.load=function(e){var t=new this;return t.root=e.root,t},t.InvertedIndex.prototype.addToken=function(e,t,n){for(var n=n||this.root,i=0;i<=e.length-1;){var o=e[i];o in n||(n[o]={docs:{},df:0}),i+=1,n=n[o]}var r=t.ref;n.docs[r]?n.docs[r]={tf:t.tf}:(n.docs[r]={tf:t.tf},n.df+=1)},t.InvertedIndex.prototype.hasToken=function(e){if(!e)return!1;for(var t=this.root,n=0;n<e.length;n++){if(!t[e[n]])return!1;t=t[e[n]]}return!0},t.InvertedIndex.prototype.getNode=function(e){if(!e)return null;for(var t=this.root,n=0;n<e.length;n++){if(!t[e[n]])return null;t=t[e[n]]}return t},t.InvertedIndex.prototype.getDocs=function(e){var t=this.getNode(e);return null==t?{}:t.docs},t.InvertedIndex.prototype.getTermFrequency=function(e,t){var n=this.getNode(e);return null==n?0:t in n.docs?n.docs[t].tf:0},t.InvertedIndex.prototype.getDocFreq=function(e){var t=this.getNode(e);return null==t?0:t.df},t.InvertedIndex.prototype.removeToken=function(e,t){if(e){var n=this.getNode(e);null!=n&&t in n.docs&&(delete n.docs[t],n.df-=1)}},t.InvertedIndex.prototype.expandToken=function(e,t,n){if(null==e||""==e)return[];var t=t||[];if(void 0==n&&(n=this.getNode(e),null==n))return t;n.df>0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e<arguments.length;e++)t=arguments[e],~this.indexOf(t)||this.elements.splice(this.locationFor(t),0,t);this.length=this.elements.length},lunr.SortedSet.prototype.toArray=function(){return this.elements.slice()},lunr.SortedSet.prototype.map=function(e,t){return this.elements.map(e,t)},lunr.SortedSet.prototype.forEach=function(e,t){return this.elements.forEach(e,t)},lunr.SortedSet.prototype.indexOf=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]<u[i]?n++:s[n]>u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o<r.length;o++)i.add(r[o]);return i},lunr.SortedSet.prototype.toJSON=function(){return this.toArray()},function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():e.elasticlunr=t()}(this,function(){return t})}();
-    /** pdoc search index */const docs = [{"fullname": "linghe", "modulename": "linghe", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.facade", "modulename": "linghe.facade", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.facade.add", "modulename": "linghe.facade.add", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.add.InplaceAddFunction", "modulename": "linghe.facade.add", "qualname": "InplaceAddFunction", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.add.InplaceAddFunction.forward", "modulename": "linghe.facade.add", "qualname": "InplaceAddFunction.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">y</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.add.InplaceAddFunction.backward", "modulename": "linghe.facade.add", "qualname": "InplaceAddFunction.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">grad_output</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.fp32_linear", "modulename": "linghe.facade.fp32_linear", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.fp32_linear.FusedFp32GEMM", "modulename": "linghe.facade.fp32_linear", "qualname": "FusedFp32GEMM", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.fp32_linear.FusedFp32GEMM.forward", "modulename": "linghe.facade.fp32_linear", "qualname": "FusedFp32GEMM.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"nb\">input</span>, </span><span class=\"param\"><span class=\"n\">weight</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.fp32_linear.FusedFp32GEMM.backward", "modulename": "linghe.facade.fp32_linear", "qualname": "FusedFp32GEMM.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">grad_output</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.loss", "modulename": "linghe.facade.loss", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.loss.SoftmaxCrossEntropyFunction", "modulename": "linghe.facade.loss", "qualname": "SoftmaxCrossEntropyFunction", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.loss.SoftmaxCrossEntropyFunction.forward", "modulename": "linghe.facade.loss", "qualname": "SoftmaxCrossEntropyFunction.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">logits</span>, </span><span class=\"param\"><span class=\"n\">labels</span>, </span><span class=\"param\"><span class=\"n\">inplace</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.loss.SoftmaxCrossEntropyFunction.backward", "modulename": "linghe.facade.loss", "qualname": "SoftmaxCrossEntropyFunction.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">grad_output</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.loss.GradScalingFunction", "modulename": "linghe.facade.loss", "qualname": "GradScalingFunction", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.loss.GradScalingFunction.forward", "modulename": "linghe.facade.loss", "qualname": "GradScalingFunction.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">coef</span><span class=\"o\">=</span><span class=\"mf\">0.2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.loss.GradScalingFunction.backward", "modulename": "linghe.facade.loss", "qualname": "GradScalingFunction.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">grad_output</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.norm", "modulename": "linghe.facade.norm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.norm.RMSNormFunction", "modulename": "linghe.facade.norm", "qualname": "RMSNormFunction", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.norm.RMSNormFunction.forward", "modulename": "linghe.facade.norm", "qualname": "RMSNormFunction.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.norm.RMSNormFunction.backward", "modulename": "linghe.facade.norm", "qualname": "RMSNormFunction.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">dy</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.norm.GroupNormGateFunction", "modulename": "linghe.facade.norm", "qualname": "GroupNormGateFunction", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.norm.GroupNormGateFunction.forward", "modulename": "linghe.facade.norm", "qualname": "GroupNormGateFunction.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">gate</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">4</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.norm.GroupNormGateFunction.backward", "modulename": "linghe.facade.norm", "qualname": "GroupNormGateFunction.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">dy</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.rope", "modulename": "linghe.facade.rope", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.rope.QkNormHalfRopeFunction", "modulename": "linghe.facade.rope", "qualname": "QkNormHalfRopeFunction", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.rope.QkNormHalfRopeFunction.forward", "modulename": "linghe.facade.rope", "qualname": "QkNormHalfRopeFunction.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">qkv</span>, </span><span class=\"param\"><span class=\"n\">q_norm_weight</span>, </span><span class=\"param\"><span class=\"n\">k_norm_weight</span>, </span><span class=\"param\"><span class=\"n\">freqs</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"o\">=</span><span class=\"mi\">32</span>, </span><span class=\"param\"><span class=\"n\">h</span><span class=\"o\">=</span><span class=\"mi\">4</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.rope.QkNormHalfRopeFunction.backward", "modulename": "linghe.facade.rope", "qualname": "QkNormHalfRopeFunction.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">grad_q</span>, </span><span class=\"param\"><span class=\"n\">grad_k</span>, </span><span class=\"param\"><span class=\"n\">grad_v</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.transpose", "modulename": "linghe.facade.transpose", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.transpose.TransposeDim01Function", "modulename": "linghe.facade.transpose", "qualname": "TransposeDim01Function", "kind": "class", "doc": "<p>Base class to create custom <code>autograd.Function</code>.</p>\n\n<p>To create a custom <code>autograd.Function</code>, subclass this class and implement\nthe :meth:<code>forward</code> and :meth:<code>backward</code> static methods. Then, to use your custom\nop in the forward pass, call the class method <code>apply</code>. Do not call\n:meth:<code>forward</code> directly.</p>\n\n<p>To ensure correctness and best performance, make sure you are calling the\ncorrect methods on <code>ctx</code> and validating your backward function using\n:func:<code>torch.autograd.gradcheck</code>.</p>\n\n<p>See :ref:<code>extending-autograd</code> for more details on how to use this class.</p>\n\n<p>Examples::</p>\n\n<pre><code>&gt;&gt;&gt; # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)\n&gt;&gt;&gt; class Exp(Function):\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def forward(ctx, i):\n&gt;&gt;&gt;         result = i.exp()\n&gt;&gt;&gt;         ctx.save_for_backward(result)\n&gt;&gt;&gt;         return result\n&gt;&gt;&gt;\n&gt;&gt;&gt;     @staticmethod\n&gt;&gt;&gt;     def backward(ctx, grad_output):\n&gt;&gt;&gt;         result, = ctx.saved_tensors\n&gt;&gt;&gt;         return grad_output * result\n&gt;&gt;&gt;\n&gt;&gt;&gt; # Use it by calling the apply method:\n&gt;&gt;&gt; # xdoctest: +SKIP\n&gt;&gt;&gt; output = Exp.apply(input)\n</code></pre>\n", "bases": "torch.autograd.function.Function"}, {"fullname": "linghe.facade.transpose.TransposeDim01Function.forward", "modulename": "linghe.facade.transpose", "qualname": "TransposeDim01Function.forward", "kind": "function", "doc": "<p>Define the forward of the custom autograd Function.</p>\n\n<p>This function is to be overridden by all subclasses.\nThere are two ways to define forward:</p>\n\n<p>Usage 1 (Combined forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(ctx: Any, *args: Any, **kwargs: Any) -&gt; Any:\n    pass\n</code></pre>\n\n<ul>\n<li>It must accept a context ctx as the first argument, followed by any\nnumber of arguments (tensors or other types).</li>\n<li>See :ref:<code>combining-forward-context</code> for more details</li>\n</ul>\n\n<p>Usage 2 (Separate forward and ctx)::</p>\n\n<pre><code>@staticmethod\ndef forward(*args: Any, **kwargs: Any) -&gt; Any:\n    pass\n\n@staticmethod\ndef setup_context(ctx: Any, inputs: Tuple[Any, ...], output: Any) -&gt; None:\n    pass\n</code></pre>\n\n<ul>\n<li>The forward no longer accepts a ctx argument.</li>\n<li>Instead, you must also override the :meth:<code>torch.autograd.Function.setup_context</code>\nstaticmethod to handle setting up the <code>ctx</code> object.\n<code>output</code> is the output of the forward, <code>inputs</code> are a Tuple of inputs\nto the forward.</li>\n<li>See :ref:<code>extending-autograd</code> for more details</li>\n</ul>\n\n<p>The context can be used to store arbitrary data that can be then\nretrieved during the backward pass. Tensors should not be stored\ndirectly on <code>ctx</code> (though this is not currently enforced for\nbackward compatibility). Instead, tensors should be saved either with\n:func:<code>ctx.save_for_backward</code> if they are intended to be used in\n<code>backward</code> (equivalently, <code>vjp</code>) or :func:<code>ctx.save_for_forward</code>\nif they are intended to be used for in <code>jvp</code>.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">x</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.transpose.TransposeDim01Function.backward", "modulename": "linghe.facade.transpose", "qualname": "TransposeDim01Function.backward", "kind": "function", "doc": "<p>Define a formula for differentiating the operation with backward mode automatic differentiation.</p>\n\n<p>This function is to be overridden by all subclasses.\n(Defining this function is equivalent to defining the <code>vjp</code> function.)</p>\n\n<p>It must accept a context :attr:<code>ctx</code> as the first argument, followed by\nas many outputs as the :func:<code>forward</code> returned (None will be passed in\nfor non tensor outputs of the forward function),\nand it should return as many tensors, as there were inputs to\n:func:<code>forward</code>. Each argument is the gradient w.r.t the given output,\nand each returned value should be the gradient w.r.t. the\ncorresponding input. If an input is not a Tensor or is a Tensor not\nrequiring grads, you can just pass None as a gradient for that input.</p>\n\n<p>The context can be used to retrieve tensors saved during the forward\npass. It also has an attribute :attr:<code>ctx.needs_input_grad</code> as a tuple\nof booleans representing whether each input needs gradient. E.g.,\n:func:<code>backward</code> will have <code>ctx.needs_input_grad[0] = True</code> if the\nfirst input to :func:<code>forward</code> needs gradient computed w.r.t. the\noutput.</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">ctx</span>, </span><span class=\"param\"><span class=\"n\">grad_output</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm", "modulename": "linghe.gemm", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.gemm.fp32_gemm", "modulename": "linghe.gemm.fp32_gemm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.gemm.fp32_gemm.fp32_gemm_kernel", "modulename": "linghe.gemm.fp32_gemm", "qualname": "fp32_gemm_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">b_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">c_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_M</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_fp32_gemm", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_fp32_gemm", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.scaled_fp32_gemm_kernel", "modulename": "linghe.gemm.fp32_gemm", "qualname": "scaled_fp32_gemm_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">b_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">c_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_M</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_scaled_fp32_gemm", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_scaled_fp32_gemm", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.fp32_gemm_for_backward_kernel", "modulename": "linghe.gemm.fp32_gemm", "qualname": "fp32_gemm_for_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">b_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">c_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ACCUM</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_M</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_fp32_gemm_for_backward", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_fp32_gemm_for_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">c</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">accum</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.fp32_gemm_for_update_kernel", "modulename": "linghe.gemm.fp32_gemm", "qualname": "fp32_gemm_for_update_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">b_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">c_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_M</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_fp32_gemm_for_update", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_fp32_gemm_for_update", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.scaled_fp32_gemm_for_update_kernel", "modulename": "linghe.gemm.fp32_gemm", "qualname": "scaled_fp32_gemm_for_update_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">b_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">c_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_M</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">BLOCK_SIZE_N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_scaled_fp32_gemm_for_update", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_scaled_fp32_gemm_for_update", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant", "modulename": "linghe.quant", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.quant.block", "modulename": "linghe.quant.block", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.quant.block.block", "modulename": "linghe.quant.block.block", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.block.block.block_quant_kernel", "modulename": "linghe.quant.block.block", "qualname": "block_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">y_ptr</span>, </span><span class=\"param\"><span class=\"n\">s_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">BLOCK_SIZE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.block.block.block_quant", "modulename": "linghe.quant.block.block", "qualname": "block_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">block_size</span><span class=\"o\">=</span><span class=\"mi\">128</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.block.group", "modulename": "linghe.quant.block.group", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.block.group.group_quant_kernel", "modulename": "linghe.quant.block.group", "qualname": "group_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">y_ptr</span>, </span><span class=\"param\"><span class=\"n\">s_ptr</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">BLOCK_SIZE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.block.group.triton_group_quant", "modulename": "linghe.quant.block.group", "qualname": "triton_group_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">dtype</span><span class=\"o\">=</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">float8_e4m3fn</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">128</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.block.group.persist_group_quant_kernel", "modulename": "linghe.quant.block.group", "qualname": "persist_group_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">y_ptr</span>, </span><span class=\"param\"><span class=\"n\">s_ptr</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">BLOCK_SIZE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">B</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.block.group.triton_persist_group_quant", "modulename": "linghe.quant.block.group", "qualname": "triton_persist_group_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">dtype</span><span class=\"o\">=</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">float8_e4m3fn</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">128</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel", "modulename": "linghe.quant.channel", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.quant.channel.channel", "modulename": "linghe.quant.channel.channel", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.channel.channel.row_quant_kernel", "modulename": "linghe.quant.channel.channel", "qualname": "row_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">q_ptr</span>, </span><span class=\"param\"><span class=\"n\">s_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">BLOCK_SIZE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_row_quant", "modulename": "linghe.quant.channel.channel", "qualname": "triton_row_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.deprecated_tokenwise_row_quant_kernel", "modulename": "linghe.quant.channel.channel", "qualname": "deprecated_tokenwise_row_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">out_ptr</span>, </span><span class=\"param\"><span class=\"n\">scale_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_deprecated_tokenwise_row_quant", "modulename": "linghe.quant.channel.channel", "qualname": "triton_deprecated_tokenwise_row_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.tokenwise_row_quant_kernel", "modulename": "linghe.quant.channel.channel", "qualname": "tokenwise_row_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">out_ptr</span>, </span><span class=\"param\"><span class=\"n\">scale_ptr</span>, </span><span class=\"param\"><span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_tokenwise_row_quant", "modulename": "linghe.quant.channel.channel", "qualname": "triton_tokenwise_row_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.transpose_row_quant_kernel", "modulename": "linghe.quant.channel.channel", "qualname": "transpose_row_quant_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">q_ptr</span>, </span><span class=\"param\"><span class=\"n\">s_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_transpose_row_quant", "modulename": "linghe.quant.channel.channel", "qualname": "triton_transpose_row_quant", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">side</span><span class=\"o\">=</span><span class=\"mi\">0</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_channel_quant_nt", "modulename": "linghe.quant.channel.channel", "qualname": "triton_channel_quant_nt", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">w</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_channel_quant_nn", "modulename": "linghe.quant.channel.channel", "qualname": "triton_channel_quant_nn", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">y</span>, </span><span class=\"param\"><span class=\"n\">w</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.triton_channel_quant_tn", "modulename": "linghe.quant.channel.channel", "qualname": "triton_channel_quant_tn", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">y</span>, </span><span class=\"param\"><span class=\"n\">x</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.channel_quant_forward", "modulename": "linghe.quant.channel.channel", "qualname": "channel_quant_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">w</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.channel_quant_backward", "modulename": "linghe.quant.channel.channel", "qualname": "channel_quant_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">y</span>, </span><span class=\"param\"><span class=\"n\">w</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.channel_quant_update", "modulename": "linghe.quant.channel.channel", "qualname": "channel_quant_update", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">y</span>, </span><span class=\"param\"><span class=\"n\">x</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.channel.fp8_channel_f_and_b", "modulename": "linghe.quant.channel.channel", "qualname": "fp8_channel_f_and_b", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">w</span>, </span><span class=\"param\"><span class=\"n\">y</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils", "modulename": "linghe.utils", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.utils.add", "modulename": "linghe.utils.add", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.add.inplace_add_kernel", "modulename": "linghe.utils.add", "qualname": "inplace_add_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">y_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">EVEN</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ACCUM</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.add.triton_inplace_add", "modulename": "linghe.utils.add", "qualname": "triton_inplace_add", "kind": "function", "doc": "<p>inplace add y to x\nArgs:\n    x: Tensor\n    y: Tensor\n    accum: whether accum y to x</p>\n\n<p>Returns:  x += y if accum=True else x.copy_(y)</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">y</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">accum</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.dot", "modulename": "linghe.utils.dot", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.dot.dot_kernel", "modulename": "linghe.utils.dot", "qualname": "dot_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">y_ptr</span>, </span><span class=\"param\"><span class=\"n\">sum_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.dot.triton_dot", "modulename": "linghe.utils.dot", "qualname": "triton_dot", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">y</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.dot.mix_precise_dot_kernel", "modulename": "linghe.utils.dot", "qualname": "mix_precise_dot_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">q_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">sum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">quant_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.dot.triton_mix_precise_dot", "modulename": "linghe.utils.dot", "qualname": "triton_mix_precise_dot", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">q</span>, </span><span class=\"param\"><span class=\"n\">smooth_scale</span>, </span><span class=\"param\"><span class=\"n\">quant_scale</span>, </span><span class=\"param\"><span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather", "modulename": "linghe.utils.gather", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.gather.block_count_kernel", "modulename": "linghe.utils.gather", "qualname": "block_count_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">map_ptr</span>, </span><span class=\"param\"><span class=\"n\">count_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">B</span>, </span><span class=\"param\"><span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">E</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.make_row_id_map_kernel", "modulename": "linghe.utils.gather", "qualname": "make_row_id_map_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">map_ptr</span>, </span><span class=\"param\"><span class=\"n\">count_ptr</span>, </span><span class=\"param\"><span class=\"n\">output_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">B</span>, </span><span class=\"param\"><span class=\"n\">P</span>, </span><span class=\"param\"><span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">E</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_make_row_id_map", "modulename": "linghe.utils.gather", "qualname": "triton_make_row_id_map", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">routing_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">multiple_of</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">1</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.make_row_id_map_and_indices_kernel", "modulename": "linghe.utils.gather", "qualname": "make_row_id_map_and_indices_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">map_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">row_map_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">row_indices_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">B</span>,</span><span class=\"param\">\t<span class=\"n\">P</span>,</span><span class=\"param\">\t<span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">E</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_make_row_id_map_and_indices", "modulename": "linghe.utils.gather", "qualname": "triton_make_row_id_map_and_indices", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">routing_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">multiple_of</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">1</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.index_select_kernel", "modulename": "linghe.utils.gather", "qualname": "index_select_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">index_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">T</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">SCALE</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_index_select", "modulename": "linghe.utils.gather", "qualname": "triton_index_select", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">indices</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale_out</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.permute_with_mask_map_kernel", "modulename": "linghe.utils.gather", "qualname": "permute_with_mask_map_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">probs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">mask_map_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_probs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">num_experts</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hs</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">SCALE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">PROB</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.fill_padded_token_with_zero_kernel", "modulename": "linghe.utils.gather", "qualname": "fill_padded_token_with_zero_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">probs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">max_indices_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">token_per_expert_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hs</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">SCALE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">PROB</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_permute_with_mask_map", "modulename": "linghe.utils.gather", "qualname": "triton_permute_with_mask_map", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">inp</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">probs</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">contiguous</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">tokens_per_expert</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.batch_smooth_transpose_smooth_permute_kernel", "modulename": "linghe.utils.gather", "qualname": "batch_smooth_transpose_smooth_permute_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">oss_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">ss_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">index_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">q_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">qs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">E</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">SMOOTHED</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_batch_transpose_smooth_permute_with_indices", "modulename": "linghe.utils.gather", "qualname": "triton_batch_transpose_smooth_permute_with_indices", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span>,</span><span class=\"param\">\t<span class=\"n\">org_smooth_scale</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span>,</span><span class=\"param\">\t<span class=\"n\">token_count_per_expert</span>,</span><span class=\"param\">\t<span class=\"n\">splits</span>,</span><span class=\"param\">\t<span class=\"n\">x_q</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.smooth_weighted_permute_with_indices_kernel", "modulename": "linghe.utils.gather", "qualname": "smooth_weighted_permute_with_indices_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">tokens_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">q_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">ss_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">qs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">index_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">sum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">REVERSE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_smooth_weighted_permute_with_indices", "modulename": "linghe.utils.gather", "qualname": "triton_smooth_weighted_permute_with_indices", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads</span>,</span><span class=\"param\">\t<span class=\"n\">tokens</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span>,</span><span class=\"param\">\t<span class=\"n\">token_count_per_expert</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span>,</span><span class=\"param\">\t<span class=\"n\">x_q</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_sum</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.smooth_permute_with_indices_kernel", "modulename": "linghe.utils.gather", "qualname": "smooth_permute_with_indices_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads_data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">grads_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">q_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">ss_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">qs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">index_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hs</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">REVERSE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">GROUP</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_smooth_permute_with_indices", "modulename": "linghe.utils.gather", "qualname": "triton_smooth_permute_with_indices", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grad_data</span>,</span><span class=\"param\">\t<span class=\"n\">grad_scale</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span>,</span><span class=\"param\">\t<span class=\"n\">token_count_per_expert</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span>,</span><span class=\"param\">\t<span class=\"n\">x_q</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.smooth_permute_with_mask_map_kernel", "modulename": "linghe.utils.gather", "qualname": "smooth_permute_with_mask_map_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads_data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">quant_data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">mask_map_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">grads_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">quant_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">T</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hs</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">REVERSE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_smooth_permute_with_mask_map", "modulename": "linghe.utils.gather", "qualname": "triton_smooth_permute_with_mask_map", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">inp</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">num_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">num_experts</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hidden_size</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.deprecated_smooth_permute_with_mask_map_kernel", "modulename": "linghe.utils.gather", "qualname": "deprecated_smooth_permute_with_mask_map_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads_data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">quant_data_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">mask_map_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">quant_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">T</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">REVERSE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_deprecated_smooth_permute_with_mask_map", "modulename": "linghe.utils.gather", "qualname": "triton_deprecated_smooth_permute_with_mask_map", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">inp</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">num_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">num_experts</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hidden_size</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.loss", "modulename": "linghe.utils.loss", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.loss.softmax_cross_entropy_forward_kernel", "modulename": "linghe.utils.loss", "qualname": "softmax_cross_entropy_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">logit_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">label_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">loss_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">sum_exp_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">max_logit_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">N</span>,</span><span class=\"param\">\t<span class=\"n\">B</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.loss.triton_softmax_cross_entropy_forward", "modulename": "linghe.utils.loss", "qualname": "triton_softmax_cross_entropy_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">logits</span>, </span><span class=\"param\"><span class=\"n\">labels</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.loss.softmax_cross_entropy_backward_kernel", "modulename": "linghe.utils.loss", "qualname": "softmax_cross_entropy_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">logit_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">label_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">sum_exp_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">max_logit_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">input_grad_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_grad_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">N</span>,</span><span class=\"param\">\t<span class=\"n\">B</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.loss.triton_softmax_cross_entropy_backward", "modulename": "linghe.utils.loss", "qualname": "triton_softmax_cross_entropy_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">logits</span>, </span><span class=\"param\"><span class=\"n\">labels</span>, </span><span class=\"param\"><span class=\"n\">sum_exp</span>, </span><span class=\"param\"><span class=\"n\">max_logit</span>, </span><span class=\"param\"><span class=\"n\">input_grad</span>, </span><span class=\"param\"><span class=\"n\">output_grad</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm", "modulename": "linghe.utils.norm", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.utils.norm.rms_norm_forward_kernel", "modulename": "linghe.utils.norm", "qualname": "rms_norm_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">weight_ptr</span>, </span><span class=\"param\"><span class=\"n\">out_ptr</span>, </span><span class=\"param\"><span class=\"n\">eps</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">T</span>, </span><span class=\"param\"><span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_rms_norm_forward", "modulename": "linghe.utils.norm", "qualname": "triton_rms_norm_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.rms_norm_backward_kernel", "modulename": "linghe.utils.norm", "qualname": "rms_norm_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grad_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">w_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dx_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dw_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">T</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_rms_norm_backward", "modulename": "linghe.utils.norm", "qualname": "triton_rms_norm_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">grad_output</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">w</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.rms_norm_and_block_quant_forward_kernel", "modulename": "linghe.utils.norm", "qualname": "rms_norm_and_block_quant_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">rms_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">nb</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.rms_norm_and_block_quant_forward_n_kernel", "modulename": "linghe.utils.norm", "qualname": "rms_norm_and_block_quant_forward_n_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">rms_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">M</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">nb</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.rms_norm_and_block_quant_forward_t_kernel", "modulename": "linghe.utils.norm", "qualname": "rms_norm_and_block_quant_forward_t_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">rms_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_rms_norm_and_block_quant_forward", "modulename": "linghe.utils.norm", "qualname": "triton_rms_norm_and_block_quant_forward", "kind": "function", "doc": "<p>Fused RMSNorm forward and block quantization.\nArgs:\n    x: Input tensor, shape [M, N]\n    weight: RMSNorm weight,  shape [N]\n    eps: epsilon value for L2 normalization.\n    out: output of quantization data\n    scale: output of quantization scale.\n    rms: output of rms\n    round_scale: Set whether to force power of 2 scales.\n    output_mode: one of {0, 1, 2}.\n        0: only output non-transpose tensor\n        1: only output transposed tensor\n        2: return both\nReturns:\n    out: quantization data\n    scale: quantization scale\n    rms: Reciprocal of the root mean square of the input calculated over the last dimension.\n    transpose_output: quantization data of transposed gradient\n    transpose_scale: quantization scale of transposed gradient</p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"p\">:</span> <span class=\"nb\">float</span> <span class=\"o\">=</span> <span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">out</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">rms</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">output_mode</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.group_norm_gate_forward_kernel", "modulename": "linghe.utils.norm", "qualname": "group_norm_gate_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">gate_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">bs</span>,</span><span class=\"param\">\t<span class=\"n\">length</span>,</span><span class=\"param\">\t<span class=\"n\">DIM</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">D</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">GROUP_SIZE</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_group_norm_gate_forward", "modulename": "linghe.utils.norm", "qualname": "triton_group_norm_gate_forward", "kind": "function", "doc": "<p>norm and gate in linear attention\nArgs:\n    x:\n    gate:\n    weight:\n    eps:\n    group_size:</p>\n\n<p>Returns:</p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">gate</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">4</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.group_rms_gate_backward_kernel", "modulename": "linghe.utils.norm", "qualname": "group_rms_gate_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grad_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">gate_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">w_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dx_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dg_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dw_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">bs</span>,</span><span class=\"param\">\t<span class=\"n\">length</span>,</span><span class=\"param\">\t<span class=\"n\">DIM</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">D</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">GROUP_SIZE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">T</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_group_norm_gate_backward", "modulename": "linghe.utils.norm", "qualname": "triton_group_norm_gate_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">grad_output</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">gate</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">4</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rearange", "modulename": "linghe.utils.rearange", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.rearange.split_and_cat_kernel", "modulename": "linghe.utils.rearange", "qualname": "split_and_cat_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">y_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">rev_accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">index_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">SCALE</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rearange.triton_split_and_cat", "modulename": "linghe.utils.rearange", "qualname": "triton_split_and_cat", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">counts</span>, </span><span class=\"param\"><span class=\"n\">indices</span>, </span><span class=\"param\"><span class=\"n\">scales</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce", "modulename": "linghe.utils.reduce", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.reduce.abs_max_kernel", "modulename": "linghe.utils.reduce", "qualname": "abs_max_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">min_value</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">EVEN</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">QUANTIZED</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.triton_abs_max", "modulename": "linghe.utils.reduce", "qualname": "triton_abs_max", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">smooth_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">min_value</span><span class=\"o\">=</span><span class=\"mf\">1e-30</span>, </span><span class=\"param\"><span class=\"n\">axis</span><span class=\"o\">=</span><span class=\"mi\">0</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.batch_count_zero_kernel", "modulename": "linghe.utils.reduce", "qualname": "batch_count_zero_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">input_ptrs</span>, </span><span class=\"param\"><span class=\"n\">size_ptr</span>, </span><span class=\"param\"><span class=\"n\">count_ptr</span>, </span><span class=\"param\"><span class=\"n\">B</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.triton_batch_count_zero", "modulename": "linghe.utils.reduce", "qualname": "triton_batch_count_zero", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.batch_sum_with_ord_kernel", "modulename": "linghe.utils.reduce", "qualname": "batch_sum_with_ord_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">input_ptrs</span>, </span><span class=\"param\"><span class=\"n\">size_ptr</span>, </span><span class=\"param\"><span class=\"n\">count_ptr</span>, </span><span class=\"param\"><span class=\"n\">B</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">ORD</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.triton_batch_sum_with_ord", "modulename": "linghe.utils.reduce", "qualname": "triton_batch_sum_with_ord", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs</span>, </span><span class=\"param\"><span class=\"nb\">ord</span><span class=\"o\">=</span><span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope", "modulename": "linghe.utils.rope", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.rope.half_rope_forward_kernel", "modulename": "linghe.utils.rope", "qualname": "half_rope_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">q_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">k_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">freqs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">qo_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">ko_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">B</span>,</span><span class=\"param\">\t<span class=\"n\">q_stride</span>,</span><span class=\"param\">\t<span class=\"n\">k_stride</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">h</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">D</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">d</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.triton_half_rope_forward", "modulename": "linghe.utils.rope", "qualname": "triton_half_rope_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">q</span>, </span><span class=\"param\"><span class=\"n\">k</span>, </span><span class=\"param\"><span class=\"n\">freqs</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.half_rope_backward_kernel", "modulename": "linghe.utils.rope", "qualname": "half_rope_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">q_ptr</span>, </span><span class=\"param\"><span class=\"n\">k_ptr</span>, </span><span class=\"param\"><span class=\"n\">freqs_ptr</span>, </span><span class=\"param\"><span class=\"n\">B</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">h</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">D</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">d</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.triton_half_rope_backward", "modulename": "linghe.utils.rope", "qualname": "triton_half_rope_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">q_grad</span>, </span><span class=\"param\"><span class=\"n\">k_grad</span>, </span><span class=\"param\"><span class=\"n\">freqs</span>, </span><span class=\"param\"><span class=\"n\">inplace</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.qk_norm_and_half_rope_forward_kernel", "modulename": "linghe.utils.rope", "qualname": "qk_norm_and_half_rope_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">qkv_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">freqs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">qo_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">ko_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">vo_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">B</span>,</span><span class=\"param\">\t<span class=\"n\">stride</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">h</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">D</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">d</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">interleave</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.triton_qk_norm_and_half_rope_forward", "modulename": "linghe.utils.rope", "qualname": "triton_qk_norm_and_half_rope_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">qkv</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">freqs</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"o\">=</span><span class=\"mi\">32</span>,</span><span class=\"param\">\t<span class=\"n\">h</span><span class=\"o\">=</span><span class=\"mi\">4</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">interleave</span><span class=\"o\">=</span><span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">transpose</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.qk_norm_and_half_rope_backward_kernel", "modulename": "linghe.utils.rope", "qualname": "qk_norm_and_half_rope_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">gq_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">gk_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">gv_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">qkv_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">freqs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dqkv_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dqw_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dkw_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">B</span>,</span><span class=\"param\">\t<span class=\"n\">stride</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">h</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">D</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">d</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">interleave</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.triton_qk_norm_and_half_rope_backward", "modulename": "linghe.utils.rope", "qualname": "triton_qk_norm_and_half_rope_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">gq</span>,</span><span class=\"param\">\t<span class=\"n\">gk</span>,</span><span class=\"param\">\t<span class=\"n\">gv</span>,</span><span class=\"param\">\t<span class=\"n\">qkv</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">freqs</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">transpose</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">interleave</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter", "modulename": "linghe.utils.scatter", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.scatter.aligned_scatter_add_kernel", "modulename": "linghe.utils.scatter", "qualname": "aligned_scatter_add_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">o_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">indices_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weights_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">K</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">SCALE</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.triton_aligned_scatter_add", "modulename": "linghe.utils.scatter", "qualname": "triton_aligned_scatter_add", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">outputs</span>, </span><span class=\"param\"><span class=\"n\">indices</span>, </span><span class=\"param\"><span class=\"n\">weights</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.scatter_add_kernel", "modulename": "linghe.utils.scatter", "qualname": "scatter_add_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">o_ptr</span>, </span><span class=\"param\"><span class=\"n\">indices_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">T</span>, </span><span class=\"param\"><span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.fp32_to_bf16_kernel", "modulename": "linghe.utils.scatter", "qualname": "fp32_to_bf16_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">o_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">T</span>, </span><span class=\"param\"><span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.triton_scatter_add", "modulename": "linghe.utils.scatter", "qualname": "triton_scatter_add", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">outputs</span>, </span><span class=\"param\"><span class=\"n\">indices</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.unpermute_with_mask_map_kernel", "modulename": "linghe.utils.scatter", "qualname": "unpermute_with_mask_map_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">probs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">mask_map_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">output_probs_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">num_experts</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">PROB</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.triton_unpermute_with_mask_map", "modulename": "linghe.utils.scatter", "qualname": "triton_unpermute_with_mask_map", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">grad</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">probs</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu", "modulename": "linghe.utils.silu", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.silu.silu_and_block_quant_forward_kernel", "modulename": "linghe.utils.silu", "qualname": "silu_and_block_quant_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">n</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">OUTPUT_MODE</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_silu_and_block_quant_forward", "modulename": "linghe.utils.silu", "qualname": "triton_silu_and_block_quant_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span>, </span><span class=\"param\"><span class=\"n\">output_mode</span><span class=\"o\">=</span><span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.silu_and_block_quant_backward_kernel", "modulename": "linghe.utils.silu", "qualname": "silu_and_block_quant_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">g_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dx_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dx_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_dx_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_dx_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">M</span>,</span><span class=\"param\">\t<span class=\"n\">n</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_silu_and_block_quant_backward", "modulename": "linghe.utils.silu", "qualname": "triton_silu_and_block_quant_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">g</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.batch_weighted_silu_and_block_quant_forward_kernel", "modulename": "linghe.utils.silu", "qualname": "batch_weighted_silu_and_block_quant_forward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">out_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_output_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">n</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">E</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">OUTPUT_MODE</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_batch_weighted_silu_and_block_quant_forward", "modulename": "linghe.utils.silu", "qualname": "triton_batch_weighted_silu_and_block_quant_forward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span>,</span><span class=\"param\">\t<span class=\"n\">weight</span>,</span><span class=\"param\">\t<span class=\"n\">counts</span>,</span><span class=\"param\">\t<span class=\"n\">splits</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">output_mode</span><span class=\"o\">=</span><span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.batch_weighted_silu_and_block_quant_backward_kernel", "modulename": "linghe.utils.silu", "qualname": "batch_weighted_silu_and_block_quant_backward_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">g_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">x_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">weight_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">count_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">accum_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dx_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dx_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_dx_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">transpose_dx_scale_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">dw_ptr</span>,</span><span class=\"param\">\t<span class=\"n\">n</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">E</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">ROUND</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_batch_weighted_silu_and_block_quant_backward", "modulename": "linghe.utils.silu", "qualname": "triton_batch_weighted_silu_and_block_quant_backward", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">g</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">counts</span>, </span><span class=\"param\"><span class=\"n\">splits</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose", "modulename": "linghe.utils.transpose", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.transpose.deprecated_transpose_kernel", "modulename": "linghe.utils.transpose", "qualname": "deprecated_transpose_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">t_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">EVEN</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_depracated_transpose", "modulename": "linghe.utils.transpose", "qualname": "triton_depracated_transpose", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.transpose_kernel", "modulename": "linghe.utils.transpose", "qualname": "transpose_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">t_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">EVEN</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.transpose_dim_0_1_kernel", "modulename": "linghe.utils.transpose", "qualname": "transpose_dim_0_1_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">t_ptr</span>, </span><span class=\"param\"><span class=\"n\">B</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">b_stride</span>, </span><span class=\"param\"><span class=\"n\">m_stride</span>, </span><span class=\"param\"><span class=\"n\">N</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_transpose", "modulename": "linghe.utils.transpose", "qualname": "triton_transpose", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">dim0</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">dim1</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.transpose_and_pad_kernel", "modulename": "linghe.utils.transpose", "qualname": "transpose_and_pad_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">t_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">P</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">EVEN</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_transpose_and_pad", "modulename": "linghe.utils.transpose", "qualname": "triton_transpose_and_pad", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">pad</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.batch_transpose_kernel", "modulename": "linghe.utils.transpose", "qualname": "batch_transpose_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs_ptr</span>, </span><span class=\"param\"><span class=\"n\">xts_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_batch_transpose", "modulename": "linghe.utils.transpose", "qualname": "triton_batch_transpose", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs</span>, </span><span class=\"param\"><span class=\"n\">xts</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.batch_transpose_and_pad_kernel", "modulename": "linghe.utils.transpose", "qualname": "batch_transpose_and_pad_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">t_ptr</span>, </span><span class=\"param\"><span class=\"n\">count_ptr</span>, </span><span class=\"param\"><span class=\"n\">accum_ptr</span>, </span><span class=\"param\"><span class=\"n\">pad_accum_ptr</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_batch_transpose_and_pad", "modulename": "linghe.utils.transpose", "qualname": "triton_batch_transpose_and_pad", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">count_list</span>, </span><span class=\"param\"><span class=\"n\">x_t</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">pad</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.configs", "modulename": "linghe.utils.transpose", "qualname": "configs", "kind": "variable", "doc": "<p></p>\n", "default_value": "[&lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;, &lt;triton.Config object&gt;]"}, {"fullname": "linghe.utils.transpose.opt_transpose_kernel", "modulename": "linghe.utils.transpose", "qualname": "opt_transpose_kernel", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x_ptr</span>, </span><span class=\"param\"><span class=\"n\">t_ptr</span>, </span><span class=\"param\"><span class=\"n\">M</span>, </span><span class=\"param\"><span class=\"n\">N</span>, </span><span class=\"param\"><span class=\"n\">D</span>, </span><span class=\"param\"><span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">W</span><span class=\"p\">:</span> <span class=\"nb\">int</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_opt_transpose", "modulename": "linghe.utils.transpose", "qualname": "triton_opt_transpose", "kind": "function", "doc": "<p></p>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}];
+    /** pdoc search index */const docs = [{"fullname": "linghe", "modulename": "linghe", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.facade", "modulename": "linghe.facade", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.facade.add", "modulename": "linghe.facade.add", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.add.inplace_add", "modulename": "linghe.facade.add", "qualname": "inplace_add", "kind": "function", "doc": "<p>inplace add y to x with mix precise</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  to be updated</li>\n<li><strong>y:</strong>  add to x</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>updated x tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">y</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.fp32_gemm", "modulename": "linghe.facade.fp32_gemm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.fp32_gemm.fp32_gemm", "modulename": "linghe.facade.fp32_gemm", "qualname": "fp32_gemm", "kind": "function", "doc": "<p>gemm with bf16/fp16 inputs and float32 output,\ncurrently used in MoE router gemm.</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>input:</strong>  bf16/fp16 activation tensor</li>\n<li><strong>weight:</strong>  bf16/fp16 weight tensor</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output of gemm</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"nb\">input</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.hadamard_quant_linear", "modulename": "linghe.facade.hadamard_quant_linear", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.hadamard_quant_linear.HadamardQuantLinear", "modulename": "linghe.facade.hadamard_quant_linear", "qualname": "HadamardQuantLinear", "kind": "class", "doc": "<p>a naive implementation of hadamard transformation and quantization</p>\n", "bases": "torch.nn.modules.module.Module"}, {"fullname": "linghe.facade.hadamard_quant_linear.HadamardQuantLinear.__init__", "modulename": "linghe.facade.hadamard_quant_linear", "qualname": "HadamardQuantLinear.__init__", "kind": "function", "doc": "<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>in_features:</strong>  in feature number</li>\n<li><strong>out_features:</strong>  out feature number</li>\n<li><strong>bias:</strong>  whether use bias</li>\n<li><strong>device:</strong>  weight device</li>\n<li><strong>dtype:</strong>  weight dtype</li>\n</ul>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">in_features</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">out_features</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">bias</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">device</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">dtype</span><span class=\"o\">=</span><span class=\"kc\">None</span></span>)</span>"}, {"fullname": "linghe.facade.loss", "modulename": "linghe.facade.loss", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.loss.softmax_cross_entropy", "modulename": "linghe.facade.loss", "qualname": "softmax_cross_entropy", "kind": "function", "doc": "<p>softmax cross entropy</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>logits:</strong>  logits tensor, shape [...,dim]</li>\n<li><strong>labels:</strong>  labels tensor, shape [...]</li>\n<li><strong>inplace:</strong>  update gradient in the <code>logits</code> tensor if True</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>a tensor of per token loss</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">logits</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">labels</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">inplace</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.norm", "modulename": "linghe.facade.norm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.norm.rms_norm", "modulename": "linghe.facade.norm", "qualname": "rms_norm", "kind": "function", "doc": "<p>rms norm of x with weight</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  activation tensor</li>\n<li><strong>weight:</strong>  weight tensor</li>\n<li><strong>eps:</strong>  epsilon for RMS</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>rms output</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"p\">:</span> <span class=\"nb\">float</span> <span class=\"o\">=</span> <span class=\"mf\">1e-06</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.norm.group_norm_gate", "modulename": "linghe.facade.norm", "qualname": "group_norm_gate", "kind": "function", "doc": "<p>return group_rms_norm(transpose(attn_output, [0,1]), weight) * sigmoid(gate)</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>attn_output:</strong>  output of core attn, shape [bs, length, n_heads, head_dim]</li>\n<li><strong>gate:</strong>  gate tensor for attention output, shape [length, bs, dim]</li>\n<li><strong>weight:</strong>  weight of RMS norm, shape [dim]</li>\n<li><strong>eps:</strong>  epsilon for RMS</li>\n<li><strong>group_size:</strong>  group size of group RMS norm</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output with shape [length, bs, dim]</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">attn_output</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">gate</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"p\">:</span> <span class=\"nb\">float</span> <span class=\"o\">=</span> <span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">group_size</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">4</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.rope", "modulename": "linghe.facade.rope", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.rope.qk_norm_half_rope", "modulename": "linghe.facade.rope", "qualname": "qk_norm_half_rope", "kind": "function", "doc": "<p>split qkv to q/k/v, apply qk norm and half rope to q/k, transpose q/k/v to flash-attention layout</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>qkv:</strong>  QKV tensor with size of [S, B, dim], heads are interleaved</li>\n<li><strong>q_norm_weight:</strong>  rms norm weight for query</li>\n<li><strong>k_norm_weight:</strong>  rms norm weight for key</li>\n<li><strong>freqs:</strong>  Freqs tensor based on half dim.</li>\n<li><strong>H:</strong>  Number of attention heads.</li>\n<li><strong>h:</strong>  Number of key/value heads.</li>\n<li><strong>eps:</strong>  epsilon value for L2 normalization.</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>qo: shape [B, S, H, head_dim]</li>\n  <li>ko: shape [B, S, h, head_dim]</li>\n  <li>vo: shape [B, S, h, head_dim]</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">qkv</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">freqs</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">32</span>,</span><span class=\"param\">\t<span class=\"n\">h</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">4</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"p\">:</span> <span class=\"nb\">float</span> <span class=\"o\">=</span> <span class=\"mf\">1e-06</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.facade.smooth_quant_linear", "modulename": "linghe.facade.smooth_quant_linear", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.smooth_quant_linear.SmoothQuantLinear", "modulename": "linghe.facade.smooth_quant_linear", "qualname": "SmoothQuantLinear", "kind": "class", "doc": "<p>a naive implementation of smooth quantization linear</p>\n", "bases": "torch.nn.modules.module.Module"}, {"fullname": "linghe.facade.smooth_quant_linear.SmoothQuantLinear.__init__", "modulename": "linghe.facade.smooth_quant_linear", "qualname": "SmoothQuantLinear.__init__", "kind": "function", "doc": "<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>in_features:</strong>  in feature number</li>\n<li><strong>out_features:</strong>  out feature number</li>\n<li><strong>bias:</strong>  whether use bias</li>\n<li><strong>device:</strong>  weight device</li>\n<li><strong>dtype:</strong>  weight dtype</li>\n</ul>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">in_features</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">out_features</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">bias</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">device</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">dtype</span><span class=\"o\">=</span><span class=\"kc\">None</span></span>)</span>"}, {"fullname": "linghe.facade.transpose", "modulename": "linghe.facade.transpose", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.facade.transpose.transpose_dim01", "modulename": "linghe.facade.transpose", "qualname": "transpose_dim01", "kind": "function", "doc": "<p>transpose a tensor with the first two dims, x.ndims should not greater than 4</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>a transposed tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm", "modulename": "linghe.gemm", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.gemm.blockwise_fp8_gemm", "modulename": "linghe.gemm.blockwise_fp8_gemm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.gemm.channelwise_fp8_gemm", "modulename": "linghe.gemm.channelwise_fp8_gemm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.gemm.channelwise_fp8_gemm.triton_scaled_mm", "modulename": "linghe.gemm.channelwise_fp8_gemm", "qualname": "triton_scaled_mm", "kind": "function", "doc": "<p>similar to torch._scaled_mm, support accumulating gemm output to c\n    and low precision output tensor</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>a:</strong>  left fp8 tensor</li>\n<li><strong>b:</strong>  right fp8 tensor, column-major</li>\n<li><strong>a_scale:</strong>  fp32 scale of a</li>\n<li><strong>b_scale:</strong>  fp32 scale of b</li>\n<li><strong>out_dtype:</strong>  output tensor dtype</li>\n<li><strong>c:</strong>  output tensor</li>\n<li><strong>accum:</strong>  accumulate output on c if True</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>c: output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">a_scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">b_scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">out_dtype</span><span class=\"o\">=</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">float32</span>,</span><span class=\"param\">\t<span class=\"n\">c</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">accum</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm", "modulename": "linghe.gemm.fp32_gemm", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.gemm.fp32_gemm.triton_fp32_gemm", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_fp32_gemm", "kind": "function", "doc": "<p>return fp32 gemm result with fp16/bf16 inputs,\n    it's mainly used for MoE router GEMM\n    and DO NOT suitable for large size GEMM</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>a:</strong>  left matrix with fp16/bf16 precision</li>\n<li><strong>b:</strong>  right matrix with fp16/bf16 precision</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>c: output with fp32 precision</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_fp32_gemm_for_backward", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_fp32_gemm_for_backward", "kind": "function", "doc": "<p>mix precision gemm for backward, a@b.float()</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>a:</strong>  input gradient, fp32</li>\n<li><strong>b:</strong>  gemm weight, bf16/fp16</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>c: gradient of activation</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_fp32_gemm_for_update", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_fp32_gemm_for_update", "kind": "function", "doc": "<p>mix precision gemm for updaing weight</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>a:</strong>  gradient of output, fp32</li>\n<li><strong>b:</strong>  input activation, bf16/fp16</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>c: gradient of weight</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_scaled_fp32_gemm", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_scaled_fp32_gemm", "kind": "function", "doc": "<p>c = (a<em>scale[:,None])</em>b\nthis kernel is used to fuse RMSNorm and quantization in MoE layer\nnative implementation:\n    y = rms_norm(x),\n    y_q = quantization(y),\n    router_logits = y@w\nwe can not fuse rms_norm and quantization\nas we still need bf16 y for moe router gemm\nfused implementation:\n    y_q, rms = quantization(rms_norm(x))\n    router_logits = (x/rms)@y\nso we need a scaled fp32 gemm kernel</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>a:</strong>  activation tensor</li>\n<li><strong>b:</strong>   weight tensor</li>\n<li><strong>scale:</strong>  scale for activation tensor, 1/rms</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.gemm.fp32_gemm.triton_scaled_fp32_gemm_for_update", "modulename": "linghe.gemm.fp32_gemm", "qualname": "triton_scaled_fp32_gemm_for_update", "kind": "function", "doc": "<p>see triton_scaled_fp32_gemm</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>a:</strong>  y</li>\n<li><strong>b:</strong>  activation before RMS norm</li>\n<li><strong>scale:</strong>  1/rms</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>dw</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">a</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">b</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant", "modulename": "linghe.quant", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.quant.block", "modulename": "linghe.quant.block", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.block.triton_block_quant", "modulename": "linghe.quant.block", "qualname": "triton_block_quant", "kind": "function", "doc": "<p>blockwise quantize x</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>block_size:</strong>  block wise</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>y: quantized tensor, float8_e4m3fn</li>\n  <li>s: quantization scale, float32</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">block_size</span><span class=\"o\">=</span><span class=\"mi\">128</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel", "modulename": "linghe.quant.channel", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.channel.triton_row_quant", "modulename": "linghe.quant.channel", "qualname": "triton_row_quant", "kind": "function", "doc": "<p>rowwise quantize x</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input x</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>x_q: quantized tensor\n  x_scale: quantization scale</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.triton_tokenwise_row_quant", "modulename": "linghe.quant.channel", "qualname": "triton_tokenwise_row_quant", "kind": "function", "doc": "<p>rowwise quantize x with power of 2 dim size</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input x</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>out: quantized tensor\n  scale: quantization scale</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.channel.triton_transpose_row_quant", "modulename": "linghe.quant.channel", "qualname": "triton_transpose_row_quant", "kind": "function", "doc": "<p>transpose x and row quantize x</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input x</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>x_q: quantized tensor</li>\n  <li>x_scale: quantization scale</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.group", "modulename": "linghe.quant.group", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.group.triton_group_quant", "modulename": "linghe.quant.group", "qualname": "triton_group_quant", "kind": "function", "doc": "<p>groupwise quantize x, group is in under rowwise format</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>group_size:</strong>  group wise</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>y: quantized tensor, float8_e4m3fn</li>\n  <li>s: quantization scale, float32</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">dtype</span><span class=\"o\">=</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">float8_e4m3fn</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">128</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.hadamard", "modulename": "linghe.quant.hadamard", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.quant.hadamard.triton_hadamard_quant", "modulename": "linghe.quant.hadamard", "qualname": "triton_hadamard_quant", "kind": "function", "doc": "<p>apply hadamard transformation and then quantize transformed tensor</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>hm:</strong>  hamadard matrix</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>x_q: rowwise quantized tensor of non-transposed x</li>\n  <li>x_scale: rowwise quantization scale of non-transposed x</li>\n  <li>xt_q: columnwise quantized tensor of transposed x</li>\n  <li>xt_scale: columnwise quantization scale of transposed x</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">hm</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.quant.smooth", "modulename": "linghe.quant.smooth", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils", "modulename": "linghe.utils", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.utils.add", "modulename": "linghe.utils.add", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.add.triton_inplace_add", "modulename": "linghe.utils.add", "qualname": "triton_inplace_add", "kind": "function", "doc": "<p>inplace add y to x</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  Tensor</li>\n<li><strong>y:</strong>  Tensor</li>\n<li><strong>accum:</strong>  x += y if accum=True else x.copy_(y)</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>updated x</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">y</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">accum</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.dot", "modulename": "linghe.utils.dot", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.dot.triton_dot", "modulename": "linghe.utils.dot", "qualname": "triton_dot", "kind": "function", "doc": "<p>vector dot multiply, output = sum(x*y, 1),\nit is used to calculate gradient of router weight</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong> </li>\n<li><strong>y:</strong> </li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output of sum(x*y, 1)</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">y</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather", "modulename": "linghe.utils.gather", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.gather.triton_make_row_id_map", "modulename": "linghe.utils.gather", "qualname": "triton_make_row_id_map", "kind": "function", "doc": "<p>make row id map, values in the tensor are the row indices</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>routing_map:</strong>  a tensor of 0/1 values, 1 indicates routed</li>\n<li><strong>multiple_of:</strong>  padding the tokens of each expert to multiple of this value</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>row id map with shape [n_tokens, n_experts]</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">routing_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">multiple_of</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">1</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_make_row_id_map_and_indices", "modulename": "linghe.utils.gather", "qualname": "triton_make_row_id_map_and_indices", "kind": "function", "doc": "<p>similar with triton_make_row_id_map, but output an indices tensor as well</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>routing_map:</strong>  [n_tokens, n_experts]</li>\n<li><strong>num_out_tokens:</strong>  sum(round_up_to(n_tokens, multiple_of))</li>\n<li><strong>multiple_of:</strong>  padding the tokens of each expert to this value</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>row_in_map: [n_tokens, n_experts]\n  row_indices: [num_out_tokens]</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">routing_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>, </span><span class=\"param\"><span class=\"n\">multiple_of</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">1</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_index_select", "modulename": "linghe.utils.gather", "qualname": "triton_index_select", "kind": "function", "doc": "<p>index select for quantized tensor</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  [bs, dim]</li>\n<li><strong>indices:</strong>  [K]</li>\n<li><strong>scale:</strong>  [bs]</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>out: output of selected x\n  scale_out: scale of selected scale</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">indices</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale_out</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_permute_with_mask_map", "modulename": "linghe.utils.gather", "qualname": "triton_permute_with_mask_map", "kind": "function", "doc": "<p>gather quantized tensor with row id map</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>inp:</strong>  [num_tokens, hidden_size], rowwise quantized tensor</li>\n<li><strong>scale:</strong>  [num_tokens], quantization scale</li>\n<li><strong>probs:</strong>  router prob, used as weight</li>\n<li><strong>row_id_map:</strong>  [n_experts, num_tokens]\nindex &gt;= 0: row index of output tensor\nindex == -1: ignore\nNote: index may not be contiguous</li>\n<li><strong>num_out_tokens:</strong>  output token count, including padding tokens</li>\n<li><strong>contiguous:</strong>  whether indices in row_id_map is contiguous,\nFalse means padded</li>\n<li><strong>tokens_per_expert:</strong>  [num_experts], token count per expert,\nnon-blocking cuda tensor</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output: permuted quantized tensor\n  permuted_scale: permuted quantization scale\n  permuted_probs: permuted router prob</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">inp</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">probs</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">contiguous</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">tokens_per_expert</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_batch_transpose_smooth_permute_with_indices", "modulename": "linghe.utils.gather", "qualname": "triton_batch_transpose_smooth_permute_with_indices", "kind": "function", "doc": "<p>used for smooth quantization backward in megatron 0.12,\nx is gathered, requantized, padded to multiple of 32 and tranposed</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  dy, [bs, dim], it is smooth quantized</li>\n<li><strong>scale:</strong>  [bs], quantized scale</li>\n<li><strong>org_smooth_scale:</strong>  [dim]</li>\n<li><strong>smooth_scales:</strong>  [n_experts, dim]</li>\n<li><strong>indices:</strong>  [sum(tokens_per_experts)]</li>\n<li><strong>token_count_per_expert:</strong>  [n_experts], tensor of token count per expert</li>\n<li><strong>splits:</strong>  [n_experts], list of token_count_per_expert</li>\n<li><strong>round_scale:</strong>  round quantization scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>x_q: [sum(roundup(tokens_per_experts)) * dim]\n  x_scale: [sum(roundup(tokens_per_experts))]</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span>,</span><span class=\"param\">\t<span class=\"n\">org_smooth_scale</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span>,</span><span class=\"param\">\t<span class=\"n\">token_count_per_expert</span>,</span><span class=\"param\">\t<span class=\"n\">splits</span>,</span><span class=\"param\">\t<span class=\"n\">x_q</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_smooth_weighted_permute_with_indices", "modulename": "linghe.utils.gather", "qualname": "triton_smooth_weighted_permute_with_indices", "kind": "function", "doc": "<p>select and smooth and quant, used in megatron 0.11 all2all moe</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>grads:</strong>  [bs, dim]</li>\n<li><strong>tokens:</strong>  [bs, dim]</li>\n<li><strong>smooth_scales:</strong>  [n_experts, dim]</li>\n<li><strong>token_count_per_expert:</strong>  [n_experts]</li>\n<li><strong>indices:</strong>  [n_experts*topk]</li>\n<li><strong>reverse:</strong>  whether scale is 1/scale</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>x_q: [bs*topk, dim]\n  x_scale: [bs<em>topk]\n  x_sum: [bs</em>topk]</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grads</span>,</span><span class=\"param\">\t<span class=\"n\">tokens</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span>,</span><span class=\"param\">\t<span class=\"n\">token_count_per_expert</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span>,</span><span class=\"param\">\t<span class=\"n\">x_q</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_sum</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_smooth_permute_with_indices", "modulename": "linghe.utils.gather", "qualname": "triton_smooth_permute_with_indices", "kind": "function", "doc": "<p>select and smooth and quant</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>grad_data:</strong>  [bs, dim]</li>\n<li><strong>grad_scale:</strong>  [bs]</li>\n<li><strong>smooth_scales:</strong>  [n_experts, dim]</li>\n<li><strong>token_count_per_expert:</strong>  [n_experts]</li>\n<li><strong>indices:</strong>  [n_experts*topk]</li>\n<li><strong>x_q:</strong>  [bs*topk, dim]</li>\n<li><strong>x_scale:</strong>  [bs*topk]</li>\n<li><strong>reverse:</strong> </li>\n<li><strong>round_scale:</strong> </li>\n</ul>\n\n<p>Returns:</p>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">grad_data</span>,</span><span class=\"param\">\t<span class=\"n\">grad_scale</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span>,</span><span class=\"param\">\t<span class=\"n\">token_count_per_expert</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span>,</span><span class=\"param\">\t<span class=\"n\">x_q</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">x_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.gather.triton_smooth_permute_with_mask_map", "modulename": "linghe.utils.gather", "qualname": "triton_smooth_permute_with_mask_map", "kind": "function", "doc": "<p>gather ( and optional dequant) and smooth quant</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>inp:</strong>  [num_tokens, hidden_size], rowwise quantized tensor</li>\n<li><strong>row_id_map:</strong>  [n_experts, num_tokens], indices</li>\n<li><strong>scale:</strong>  [num_tokens, hs], rowwise_scale_inv, optional</li>\n<li><strong>num_tokens:</strong>  [n_experts]</li>\n<li><strong>num_experts:</strong> </li>\n<li><strong>num_out_tokens:</strong> </li>\n<li><strong>hidden_size:</strong> </li>\n<li><strong>smooth_scales:</strong>  [n_experts, hidden_size]</li>\n<li><strong>reverse:</strong> </li>\n<li><strong>round_scale:</strong> </li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>output: output tensor</li>\n  <li>permuted_scale: permuted scale if scale is not None</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">inp</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">num_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">num_experts</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">num_out_tokens</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">hidden_size</span><span class=\"p\">:</span> <span class=\"nb\">int</span>,</span><span class=\"param\">\t<span class=\"n\">smooth_scales</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">reverse</span><span class=\"o\">=</span><span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.loss", "modulename": "linghe.utils.loss", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.loss.triton_softmax_cross_entropy_forward", "modulename": "linghe.utils.loss", "qualname": "triton_softmax_cross_entropy_forward", "kind": "function", "doc": "<p>compute token-wise softmax cross entropy loss</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>logits:</strong>  logits tensor</li>\n<li><strong>labels:</strong>  labels tensor</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>loss of each token</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">logits</span>, </span><span class=\"param\"><span class=\"n\">labels</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.loss.triton_softmax_cross_entropy_backward", "modulename": "linghe.utils.loss", "qualname": "triton_softmax_cross_entropy_backward", "kind": "function", "doc": "<p>backward of softmax cross entropy loss</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>logits:</strong>  logit tensor, [bs, dim]</li>\n<li><strong>labels:</strong>  label tensor, [bs]</li>\n<li><strong>sum_exp:</strong>   [bs]</li>\n<li><strong>max_logit:</strong>  [bs]</li>\n<li><strong>input_grad:</strong>  gradient, [bs, dim]</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output_grad: [bs, dim]</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">logits</span>, </span><span class=\"param\"><span class=\"n\">labels</span>, </span><span class=\"param\"><span class=\"n\">sum_exp</span>, </span><span class=\"param\"><span class=\"n\">max_logit</span>, </span><span class=\"param\"><span class=\"n\">input_grad</span>, </span><span class=\"param\"><span class=\"n\">output_grad</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm", "modulename": "linghe.utils.norm", "kind": "module", "doc": "<p></p>\n"}, {"fullname": "linghe.utils.norm.triton_rms_norm_forward", "modulename": "linghe.utils.norm", "qualname": "triton_rms_norm_forward", "kind": "function", "doc": "<p>rms norm</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>weight:</strong>  weight of rms norm</li>\n<li><strong>eps:</strong>  epsilon of rms norm</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>out: output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_rms_norm_and_block_quant_forward", "modulename": "linghe.utils.norm", "qualname": "triton_rms_norm_and_block_quant_forward", "kind": "function", "doc": "<p>Fused RMSNorm forward and block quantization.</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  Input tensor, shape [M, N]</li>\n<li><strong>weight:</strong>  RMSNorm weight,  shape [N]</li>\n<li><strong>eps:</strong>  epsilon value for L2 normalization.</li>\n<li><strong>out:</strong>  output of quantization data</li>\n<li><strong>scale:</strong>  output of quantization scale.</li>\n<li><strong>rms:</strong>  output of rms</li>\n<li><strong>round_scale:</strong>  Set whether to force power of 2 scales.</li>\n<li><strong>output_mode:</strong>  one of {0, 1, 2}.\n0: only output non-transpose tensor\n1: only output transposed tensor\n2: return both</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>out: quantization data.</li>\n  <li>scale: quantization scale.</li>\n  <li>rms: Reciprocal of the root mean square of the\n  input calculated over the last dimension.</li>\n  <li>transpose_output: quantization data of transposed gradient.</li>\n  <li>transpose_scale: quantization scale of transposed gradient.</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">weight</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"p\">:</span> <span class=\"nb\">float</span> <span class=\"o\">=</span> <span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">out</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">rms</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"p\">:</span> <span class=\"nb\">bool</span> <span class=\"o\">=</span> <span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">output_mode</span><span class=\"p\">:</span> <span class=\"nb\">int</span> <span class=\"o\">=</span> <span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.norm.triton_group_norm_gate_forward", "modulename": "linghe.utils.norm", "qualname": "triton_group_norm_gate_forward", "kind": "function", "doc": "<p>norm and gate in linear attention</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  output of attn, [bs, length, n_heads, head_dim]</li>\n<li><strong>gate:</strong>  gate tensor, [length, bs, dim]</li>\n<li><strong>weight:</strong>  rms norm weight, [dim]</li>\n<li><strong>eps:</strong>  epsilon of rms norm</li>\n<li><strong>group_size:</strong>  group size of group rms norm</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">gate</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>, </span><span class=\"param\"><span class=\"n\">group_size</span><span class=\"o\">=</span><span class=\"mi\">4</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rearange", "modulename": "linghe.utils.rearange", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.rearange.triton_split_and_cat", "modulename": "linghe.utils.rearange", "qualname": "triton_split_and_cat", "kind": "function", "doc": "<p>split x to multiple tensors and cat with indices,\nit is used for permutation in moe</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  [bs, dim]</li>\n<li><strong>counts:</strong>  [n_split]</li>\n<li><strong>indices:</strong>  [n_split]</li>\n<li><strong>scales:</strong>  [bs]</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>y: output tensor</li>\n  <li>output_scales: output scales if scales is not None</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">counts</span>, </span><span class=\"param\"><span class=\"n\">indices</span>, </span><span class=\"param\"><span class=\"n\">scales</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce", "modulename": "linghe.utils.reduce", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.reduce.triton_abs_max", "modulename": "linghe.utils.reduce", "qualname": "triton_abs_max", "kind": "function", "doc": "<p>columnwise abs max of x, it is used in smooth quantization</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor, may be quantized tensor</li>\n<li><strong>scale:</strong>  quantization scale if x is quantized</li>\n<li><strong>smooth_scale:</strong>  optional smooth scale</li>\n<li><strong>min_value:</strong>  output = max(max(abs(x,0)), min_value)</li>\n<li><strong>axis:</strong>  reduce axis</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>max tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">smooth_scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">min_value</span><span class=\"o\">=</span><span class=\"mf\">1e-30</span>, </span><span class=\"param\"><span class=\"n\">axis</span><span class=\"o\">=</span><span class=\"mi\">0</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.triton_batch_count_zero", "modulename": "linghe.utils.reduce", "qualname": "triton_batch_count_zero", "kind": "function", "doc": "<p>count zero in tensor list, it is used to monitor zeros in gradient tensor</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>xs:</strong>  input tensors</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>a single-value int64 tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.reduce.triton_batch_sum_with_ord", "modulename": "linghe.utils.reduce", "qualname": "triton_batch_sum_with_ord", "kind": "function", "doc": "<p>return sum(abs(x)**ord).</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>xs:</strong>  Tensor lists.</li>\n<li><strong>ord:</strong>  the order of tensor.</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>a single-value fp32 tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs</span>, </span><span class=\"param\"><span class=\"nb\">ord</span><span class=\"o\">=</span><span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope", "modulename": "linghe.utils.rope", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.rope.triton_half_rope_forward", "modulename": "linghe.utils.rope", "qualname": "triton_half_rope_forward", "kind": "function", "doc": "<p>apply norm to qk, then apply half rope to qk</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>q:</strong>  query tensor, [len, bs, q_head, head_dim]</li>\n<li><strong>k:</strong>  key tensor, [len, bs, kv_head, head_dim]</li>\n<li><strong>freqs:</strong>  rope freqs</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>qo: query output</li>\n  <li>ko: key output</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">q</span>, </span><span class=\"param\"><span class=\"n\">k</span>, </span><span class=\"param\"><span class=\"n\">freqs</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.triton_qk_norm_and_half_rope_forward", "modulename": "linghe.utils.rope", "qualname": "triton_qk_norm_and_half_rope_forward", "kind": "function", "doc": "<p>split qkv to q/k/v, apply qk norm and half rope to q/k,\n    transpose q/k/v to flash-attention layout</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>qkv:</strong>  QKV tensor with size of [S, B, dim], heads are interleaved</li>\n<li><strong>q_norm_weight:</strong>  rms norm weight for query</li>\n<li><strong>k_norm_weight:</strong>  rms norm weight for key</li>\n<li><strong>freqs:</strong>  Freqs tensor based on half dim.</li>\n<li><strong>H:</strong>  Number of attention heads.</li>\n<li><strong>h:</strong>  Number of key/value heads.</li>\n<li><strong>eps:</strong>  epsilon value for L2 normalization.</li>\n<li><strong>interleave:</strong>  whether head of qkv is interleaved, i.e., [qqkvqqkv]</li>\n<li><strong>transpose:</strong>  whether qkv is tranposed, i.e., [S, B, dim],\nonly support transpose format currently</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>qo: shape [B, S, H, head_dim]</li>\n  <li>ko: shape [B, S, h, head_dim]</li>\n  <li>vo: shape [B, S, h, head_dim]</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">qkv</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">freqs</span>,</span><span class=\"param\">\t<span class=\"n\">H</span><span class=\"o\">=</span><span class=\"mi\">32</span>,</span><span class=\"param\">\t<span class=\"n\">h</span><span class=\"o\">=</span><span class=\"mi\">4</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">interleave</span><span class=\"o\">=</span><span class=\"kc\">True</span>,</span><span class=\"param\">\t<span class=\"n\">transpose</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.rope.triton_qk_norm_and_half_rope_backward", "modulename": "linghe.utils.rope", "qualname": "triton_qk_norm_and_half_rope_backward", "kind": "function", "doc": "<p>backward kernel of triton_qk_norm_and_half_rope_forward</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>gq:</strong>  gradient of qo, [len, bs, q_head, head_dim]</li>\n<li><strong>gk:</strong>  gradient of ko, [len, bs, q_head, head_dim]</li>\n<li><strong>gv:</strong>  gradient of vo, [len, bs, q_head, head_dim]</li>\n<li><strong>qkv:</strong>  input qkv</li>\n<li><strong>q_norm_weight:</strong> </li>\n<li><strong>k_norm_weight:</strong> </li>\n<li><strong>freqs:</strong> </li>\n<li><strong>eps:</strong> </li>\n<li><strong>transpose:</strong> </li>\n<li><strong>interleave:</strong> </li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>dqkv: gradient of qkv</li>\n  <li>dqw: gradient of q_norm_weight</li>\n  <li>dkw: gradient of k_norm_weight</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">gq</span>,</span><span class=\"param\">\t<span class=\"n\">gk</span>,</span><span class=\"param\">\t<span class=\"n\">gv</span>,</span><span class=\"param\">\t<span class=\"n\">qkv</span>,</span><span class=\"param\">\t<span class=\"n\">q_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">k_norm_weight</span>,</span><span class=\"param\">\t<span class=\"n\">freqs</span>,</span><span class=\"param\">\t<span class=\"n\">eps</span><span class=\"o\">=</span><span class=\"mf\">1e-06</span>,</span><span class=\"param\">\t<span class=\"n\">transpose</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">interleave</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter", "modulename": "linghe.utils.scatter", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.scatter.triton_aligned_scatter_add", "modulename": "linghe.utils.scatter", "qualname": "triton_aligned_scatter_add", "kind": "function", "doc": "<p>scatter_add for megatron 0.11</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>outputs:</strong>   output tensor</li>\n<li><strong>indices:</strong>   gather indices</li>\n<li><strong>weights:</strong>   rowwise weight, it is router prob in MoE router</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">outputs</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">indices</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">weights</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.triton_scatter_add", "modulename": "linghe.utils.scatter", "qualname": "triton_scatter_add", "kind": "function", "doc": "<p>naive version of scatter add, very slow</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>outputs:</strong>  output tensor</li>\n<li><strong>indices:</strong>  indices</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">outputs</span>, </span><span class=\"param\"><span class=\"n\">indices</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.scatter.triton_unpermute_with_mask_map", "modulename": "linghe.utils.scatter", "qualname": "triton_unpermute_with_mask_map", "kind": "function", "doc": "<p>scatter add with row id map</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>grad:</strong>  gradient tensor, [num_out_tokens, hidden_size]</li>\n<li><strong>row_id_map:</strong>  row id map, [n_experts, num_tokens]</li>\n<li><strong>probs:</strong>  [num_out_tokens]</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>output: [num_tokens, hidden_size]</li>\n  <li>restore_probs: [num_tokens, num_experts]</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">grad</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">row_id_map</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>, </span><span class=\"param\"><span class=\"n\">probs</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu", "modulename": "linghe.utils.silu", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.silu.triton_weighted_silu_forward", "modulename": "linghe.utils.silu", "qualname": "triton_weighted_silu_forward", "kind": "function", "doc": "<p>compute silu(x)*weight, used in bf16/fp16 training with MoE</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>weight:</strong>  tokenwise weight</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>out: output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">weight</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_weighted_silu_backward", "modulename": "linghe.utils.silu", "qualname": "triton_weighted_silu_backward", "kind": "function", "doc": "<p>backward of triton_weighted_silu_forward</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>g:</strong>  gradient tensor</li>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>weight:</strong>  weight tensor</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>dx: gradient of x</li>\n  <li>dw: gradient of weight</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">g</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">weight</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_silu_and_block_quant_forward", "modulename": "linghe.utils.silu", "qualname": "triton_silu_and_block_quant_forward", "kind": "function", "doc": "<p>fused silu and blockwise quantization, used in shared expert</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n<li><strong>output_mode:</strong>  one of {0, 1, 2}\n0: only output non-transposed quantized tensor\n1: only output transposed quantized tensor\n2: output both</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>out: quantized tensor</li>\n  <li>scale: quantization scale</li>\n  <li>transpose_output: quantized tensor of transposed output</li>\n  <li>transpose_scale: quantization scale of transposed output</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span>, </span><span class=\"param\"><span class=\"n\">output_mode</span><span class=\"o\">=</span><span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_silu_and_block_quant_backward", "modulename": "linghe.utils.silu", "qualname": "triton_silu_and_block_quant_backward", "kind": "function", "doc": "<p>backward of triton_silu_and_block_quant_forward</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>g:</strong>  gradient</li>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>round_scale:</strong>  whether round to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>dx: quantized non-transposed gradient</li>\n  <li>dx_scale: scales of quantization non-transposed gradient</li>\n  <li>transpose_dx: quantized transposed gradient</li>\n  <li>transpose_dx_scale: scales of quantization transposed gradient</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">g</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_batch_weighted_silu_and_block_quant_forward", "modulename": "linghe.utils.silu", "qualname": "triton_batch_weighted_silu_and_block_quant_forward", "kind": "function", "doc": "<p>silu and blockwise quantize activation in routed experts</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  activation tensor in routed experts</li>\n<li><strong>weight:</strong>  router prob tensor</li>\n<li><strong>counts:</strong>  cuda tensor of token count per expert</li>\n<li><strong>splits:</strong>  python int list of token count per expert</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n<li><strong>output_mode:</strong>  one of {0, 1, 2}\n0: only output non-transposed quantized tensor\n1: only output transposed quantized tensor\n2: output both</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>out: quantized tensor</li>\n  <li>scale: quantization scale</li>\n  <li>transpose_output: quantized tensor of transposed output</li>\n  <li>transpose_scale: quantization scale of transposed output</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span>,</span><span class=\"param\">\t<span class=\"n\">weight</span>,</span><span class=\"param\">\t<span class=\"n\">counts</span>,</span><span class=\"param\">\t<span class=\"n\">splits</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">scale</span><span class=\"o\">=</span><span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span>,</span><span class=\"param\">\t<span class=\"n\">output_mode</span><span class=\"o\">=</span><span class=\"mi\">2</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.silu.triton_batch_weighted_silu_and_block_quant_backward", "modulename": "linghe.utils.silu", "qualname": "triton_batch_weighted_silu_and_block_quant_backward", "kind": "function", "doc": "<p>backward of triton_batch_weighted_silu_and_block_quant_forward</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>g:</strong>  gradient</li>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>weight:</strong>  router prob tensor</li>\n<li><strong>counts:</strong>  cuda tensor of token count per expert</li>\n<li><strong>splits:</strong>  python int list of token count per expert</li>\n<li><strong>round_scale:</strong>  whether round scale to power of 2</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <ul>\n  <li>dx: quantized non-transposed gradient</li>\n  <li>dx_scale: scales of quantization non-transposed gradient</li>\n  <li>dw: gradient of weight</li>\n  <li>transpose_dx: quantized transposed gradient</li>\n  <li>transpose_dx_scale: scales of quantization transposed gradient</li>\n  </ul>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">g</span>, </span><span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">weight</span>, </span><span class=\"param\"><span class=\"n\">counts</span>, </span><span class=\"param\"><span class=\"n\">splits</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">round_scale</span><span class=\"o\">=</span><span class=\"kc\">False</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose", "modulename": "linghe.utils.transpose", "kind": "module", "doc": "<p>Copyright (c) Ant Financial Service Group and its affiliates.</p>\n"}, {"fullname": "linghe.utils.transpose.triton_transpose", "modulename": "linghe.utils.transpose", "qualname": "triton_transpose", "kind": "function", "doc": "<p>transpose x with dim0 and dim1</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>dim0:</strong>  dim 0</li>\n<li><strong>dim1:</strong>  dim 1</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>transposed tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code multiline\">(<span class=\"param\">\t<span class=\"n\">x</span><span class=\"p\">:</span> <span class=\"n\">torch</span><span class=\"o\">.</span><span class=\"n\">Tensor</span>,</span><span class=\"param\">\t<span class=\"n\">dim0</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"nb\">int</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>,</span><span class=\"param\">\t<span class=\"n\">dim1</span><span class=\"p\">:</span> <span class=\"n\">Optional</span><span class=\"p\">[</span><span class=\"nb\">int</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_transpose_and_pad", "modulename": "linghe.utils.transpose", "qualname": "triton_transpose_and_pad", "kind": "function", "doc": "<p>transpose x and padding the column size to be mutiplier of 32,\nit is used for calculated gradient of weight with torch._scaled__mm</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  input tensor</li>\n<li><strong>out:</strong> </li>\n<li><strong>pad:</strong>  whether need padding</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>out: output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">out</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">pad</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_batch_transpose", "modulename": "linghe.utils.transpose", "qualname": "triton_batch_transpose", "kind": "function", "doc": "<p>batch transpose x</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>xs:</strong>  input tensor list, [M, N]*expert</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>xts: output tensor list, [N,M]*expert</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">xs</span>, </span><span class=\"param\"><span class=\"n\">xts</span><span class=\"o\">=</span><span class=\"kc\">None</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}, {"fullname": "linghe.utils.transpose.triton_batch_transpose_and_pad", "modulename": "linghe.utils.transpose", "qualname": "triton_batch_transpose_and_pad", "kind": "function", "doc": "<p>transpose and pad each tensor stored in x</p>\n\n<h6 id=\"arguments\">Arguments:</h6>\n\n<ul>\n<li><strong>x:</strong>  [sum(bs), N]</li>\n<li><strong>count_list:</strong>  a python list of token count</li>\n<li><strong>pad:</strong>  whether pad to mutiplier of 32,\npadding value should be filled with 0 if padded</li>\n</ul>\n\n<h6 id=\"returns\">Returns:</h6>\n\n<blockquote>\n  <p>x_t: output tensor</p>\n</blockquote>\n", "signature": "<span class=\"signature pdoc-code condensed\">(<span class=\"param\"><span class=\"n\">x</span>, </span><span class=\"param\"><span class=\"n\">count_list</span>, </span><span class=\"param\"><span class=\"n\">x_t</span><span class=\"o\">=</span><span class=\"kc\">None</span>, </span><span class=\"param\"><span class=\"n\">pad</span><span class=\"o\">=</span><span class=\"kc\">True</span></span><span class=\"return-annotation\">):</span></span>", "funcdef": "def"}];
 
     // mirrored in build-search-index.js (part 1)
     // Also split on html tags. this is a cheap heuristic, but good enough.
diff --git a/linghe/facade/add.py b/linghe/facade/add.py
index c40aca4..cac4306 100644
--- a/linghe/facade/add.py
+++ b/linghe/facade/add.py
@@ -9,9 +9,7 @@
 
 
 class InplaceAddFunction(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, x: torch.Tensor, y: torch.Tensor):
         return triton_inplace_add(x, y)
@@ -28,6 +26,6 @@ def inplace_add(x: torch.Tensor, y: torch.Tensor):
         x: to be updated
         y: add to x
     Returns:
-        return updated x tensor
+        updated x tensor
     """
     return InplaceAddFunction.apply(x, y)
\ No newline at end of file
diff --git a/linghe/facade/fp32_gemm.py b/linghe/facade/fp32_gemm.py
index 8ab9ae9..2f6ff34 100644
--- a/linghe/facade/fp32_gemm.py
+++ b/linghe/facade/fp32_gemm.py
@@ -11,9 +11,7 @@
 
 
 class Fp32GEMM(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, input: torch.Tensor, weight: torch.Tensor):
         shape = input.shape
diff --git a/linghe/facade/hadamard_quant_linear.py b/linghe/facade/hadamard_quant_linear.py
index 586f104..5b3dd45 100644
--- a/linghe/facade/hadamard_quant_linear.py
+++ b/linghe/facade/hadamard_quant_linear.py
@@ -34,8 +34,8 @@ def forward(
 
         output = torch._scaled_mm(x_q,
                                   w_q.t(),
-                                  scale_a=x_scale,
-                                  scale_b=w_scale,
+                                  scale_a=x_scale.view(-1,1),
+                                  scale_b=w_scale.view(1,-1),
                                   out_dtype=ctx.out_dtype,
                                   use_fast_accum=True
                                   )
@@ -61,7 +61,6 @@ def backward(
             output_grad: torch.Tensor,
     ):
         xt_q, xt_scale, wt_q, wt_scale, hadamard_matrix = ctx.saved_tensors
-        results = [None, None, None, None]
 
         output_grad = output_grad.view(-1, output_grad.shape[-1])
 
@@ -69,32 +68,33 @@ def backward(
 
         dx = torch._scaled_mm(y_q,
                                   wt_q.t(),
-                                  scale_a=y_scale,
-                                  scale_b=wt_scale,
+                                  scale_a=y_scale.view(-1,1),
+                                  scale_b=wt_scale.view(1,-1),
                                   out_dtype=ctx.out_dtype,
                                   use_fast_accum=True
                                   )
 
-        # calculate input grad and assign to results[0]
-        results[0] = dx.view(ctx.input_shape)
+        dx = dx.view(ctx.input_shape)
 
-        # calculate weight grad and assign to results[1]
         dw = torch._scaled_mm(yt_q,
                                   xt_q.t(),
-                                  scale_a=yt_scale,
-                                  scale_b=xt_scale,
+                                  scale_a=yt_scale.view(-1,1),
+                                  scale_b=xt_scale.view(1,-1),
                                   out_dtype=ctx.out_dtype,
                                   use_fast_accum=True
                                   )
-        results[1] = dw
 
+        db = None
         if ctx.bias_requires_grad:
-            # calculate bias grad and assign to results[2]
-            results[2] = torch.sum(output_grad, dim=0)
+            db = torch.sum(output_grad, dim=0)
+
+        return dx, dw, db, None
 
-        return tuple(results)
 
 class HadamardQuantLinear(torch.nn.Module):
+    """
+    a naive implementation of hadamard transformation and quantization
+    """
     def __init__(
             self,
             in_features: int,
@@ -104,14 +104,12 @@ def __init__(
             dtype=None
     ):
         """
-        a naive implementation of hadamard transformation and quantization
         Args:
             in_features: in feature number
             out_features: out feature number
             bias: whether use bias
             device: weight device
             dtype: weight dtype
-            impl: implementation of hadamard quantization
         """
         super().__init__()
         self.in_features = in_features
@@ -145,6 +143,7 @@ def _hadamard_matrix(self, size, device=None, dtype=None, norm=False):
         return m
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """"""
         if self.training:
             return _HadamardQuantLinear.apply(input, self.weight, self.bias,
                                                   self.hadamard_matrix)
@@ -155,9 +154,11 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
             return output
 
     def extra_repr(self) -> str:
+        """"""
         return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"
 
     def reset_parameters(self):
+        """"""
         self.weight.data.normal_(mean=0.0, std=0.02)
         if self.bias is not None:
             self.bias.data.zero_()
diff --git a/linghe/facade/loss.py b/linghe/facade/loss.py
index 1fa7294..0feac8a 100644
--- a/linghe/facade/loss.py
+++ b/linghe/facade/loss.py
@@ -10,9 +10,7 @@
 
 
 class SoftmaxCrossEntropyFunction(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, logits, labels, inplace=False):
         shape = logits.shape
@@ -48,9 +46,8 @@ def softmax_cross_entropy(logits: torch.Tensor, labels: torch.Tensor, inplace: b
         logits: logits tensor, shape [...,dim]
         labels: labels tensor, shape [...]
         inplace: update gradient in the `logits` tensor if True
-
     Returns:
-        per token loss
+        a tensor of per token loss
     """
     assert logits.is_contiguous()
     assert labels.is_contiguous()
@@ -58,9 +55,7 @@ def softmax_cross_entropy(logits: torch.Tensor, labels: torch.Tensor, inplace: b
 
 
 class GradScalingFunction(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, x, coef=0.2):
         ctx.coef = coef
diff --git a/linghe/facade/norm.py b/linghe/facade/norm.py
index 435942f..9dc90e5 100644
--- a/linghe/facade/norm.py
+++ b/linghe/facade/norm.py
@@ -10,9 +10,7 @@
 
 
 class RMSNormFunction(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, x, weight, eps=1e-6):
         output = triton_rms_norm_forward(
@@ -56,9 +54,7 @@ def rms_norm(x: torch.Tensor, weight: torch.Tensor, eps: float = 1e-6):
     return RMSNormFunction.apply(x, weight, eps)
 
 class GroupNormGateFunction(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, attn_output, gate, weight, eps=1e-6, group_size=4):
         output = triton_group_norm_gate_forward(
diff --git a/linghe/facade/rope.py b/linghe/facade/rope.py
index 3719095..f845183 100644
--- a/linghe/facade/rope.py
+++ b/linghe/facade/rope.py
@@ -10,9 +10,7 @@
 
 
 class QkNormHalfRopeFunction(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, qkv, q_norm_weight, k_norm_weight, freqs, H=32, h=4,
                 eps=1e-6):
@@ -71,9 +69,9 @@ def qk_norm_half_rope(qkv: torch.Tensor,
         eps: epsilon value for L2 normalization.
 
     Returns:
-        qo: shape [B, S, H, head_dim]
-        ko: shape [B, S, h, head_dim]
-        vo: shape [B, S, h, head_dim]
+        - qo: shape [B, S, H, head_dim]
+        - ko: shape [B, S, h, head_dim]
+        - vo: shape [B, S, h, head_dim]
     """
     return QkNormHalfRopeFunction.apply(qkv,
                                         q_norm_weight,
diff --git a/linghe/facade/smooth_quant_linear.py b/linghe/facade/smooth_quant_linear.py
index fccbabb..dd284e2 100644
--- a/linghe/facade/smooth_quant_linear.py
+++ b/linghe/facade/smooth_quant_linear.py
@@ -19,19 +19,22 @@ def forward(
             ctx,
             input: torch.Tensor,
             weight: torch.Tensor,
+            bias: Optional[torch.Tensor],
             smooth_scale: torch.Tensor,
-            bias: Optional[torch.Tensor]
     ):
         ctx.input_requires_grad = input.requires_grad
         ctx.weight_requires_grad = weight.requires_grad
         ctx.bias_requires_grad = bias is not None and bias.requires_grad
-
         ctx.out_dtype = input.dtype
         ctx.input_shape = input.shape
+
+        round_scale = True
+        ctx.round_scale = round_scale
+
         input = input.view(-1, input.shape[-1])
 
-        x_q, x_scale, x_maxs = triton_smooth_quant(input, 1 / smooth_scale)
-        w_q, w_scale, w_maxs = triton_smooth_quant(weight, smooth_scale)
+        x_q, x_scale, x_maxs = triton_smooth_quant(input, 1 / smooth_scale, round_scale=round_scale)
+        w_q, w_scale, w_maxs = triton_smooth_quant(weight, smooth_scale, round_scale=round_scale)
 
         output = torch._scaled_mm(x_q,
                                   w_q.t(),
@@ -60,26 +63,28 @@ def backward(
             ctx,
             output_grad: torch.Tensor
     ):
+
         x_q, x_s, w_q, w_s, smooth_scale = ctx.saved_tensors
-        results = [None, None, None, None]
 
         output_grad = output_grad.view(-1, output_grad.shape[-1])
-
-        y_q, y_scale, y_maxs = triton_smooth_quant(output_grad, w_s)
+        round_scale = ctx.round_scale
+        y_q, y_scale, y_maxs = triton_smooth_quant(output_grad,
+                                                   w_s, 
+                                                   reverse=True, 
+                                                   round_scale=round_scale)
 
         wt_q = triton_transpose_and_pad(w_q, pad=True)
         dx = torch._scaled_mm(y_q,
-                                  wt_q.t(),
-                                  scale_a=y_scale.view(-1, 1),
-                                  scale_b=smooth_scale.view(1, -1),
-                                  out_dtype=ctx.out_dtype,
-                                  use_fast_accum=True)
+                            wt_q.t(),
+                            scale_a=y_scale.view(-1, 1),
+                            scale_b=smooth_scale.view(1, -1),
+                            out_dtype=ctx.out_dtype,
+                            use_fast_accum=True)
 
-        # calculate input grad and assign to results[0]
-        results[0] = dx.view(ctx.input_shape)
-
-        # calculate weight grad and assign to results[1]
-        yt_q, yt_scale, yt_maxs = triton_transpose_smooth_quant(output_grad, x_s)
+        yt_q, yt_scale = triton_transpose_smooth_quant(output_grad, 
+                                                       x_s, 
+                                                       reverse=True , 
+                                                       round_scale=round_scale)
 
         xt_q = triton_transpose_and_pad(x_q, pad=True)
         dw = torch._scaled_mm(yt_q,
@@ -89,16 +94,17 @@ def backward(
                                   out_dtype=ctx.out_dtype,
                                   use_fast_accum=True)
 
-        results[1] = dw
-
+        db = None
         if ctx.bias_requires_grad:
-            # calculate bias grad and assign to results[2]
-            results[2] = torch.sum(output_grad, dim=0)
+            db = torch.sum(output_grad, dim=0)
 
-        return tuple(results)
+        return dx, dw, db, None
 
 
-class QuantLinear(torch.nn.Module):
+class SmoothQuantLinear(torch.nn.Module):
+    """
+    a naive implementation of smooth quantization linear
+    """
     def __init__(
             self,
             in_features: int,
@@ -107,6 +113,14 @@ def __init__(
             device=None,
             dtype=None
     ):
+        """
+        Args:
+            in_features: in feature number
+            out_features: out feature number
+            bias: whether use bias
+            device: weight device
+            dtype: weight dtype
+        """
         super().__init__()
         self.in_features = in_features
         self.out_features = out_features
@@ -120,13 +134,13 @@ def __init__(
             self.bias = None
 
         self.gap_step = 16
-        self.decay_coef = 0.9
         self.smooth_scale = None
         self.smooth_update_step = 0
 
         self.reset_parameters()
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """"""
         if self.training:
 
             if self.smooth_update_step % self.gap_step == 0:
@@ -134,10 +148,10 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
                 weight_maxs = triton_abs_max(self.weight.data)
                 self.smooth_scale = torch.sqrt(input_maxs * weight_maxs)
 
-            output, smooth_scale = _SmoothQuantLinear.apply(input,
-                                                                  self.weight,
-                                                                  self.bias,
-                                                                  self.smooth_scale)
+            output = _SmoothQuantLinear.apply(input,
+                                              self.weight,
+                                              self.bias,
+                                              self.smooth_scale)
             self.smooth_update_step += 1
         else:
             output = input @ self.weight.t()
@@ -146,9 +160,11 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
         return output
 
     def extra_repr(self) -> str:
+        """"""
         return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"
 
     def reset_parameters(self):
+        """"""
         self.weight.data.normal_(mean=0.0, std=0.02)
         if self.bias is not None:
             self.bias.data.zero_()
diff --git a/linghe/facade/transpose.py b/linghe/facade/transpose.py
index 9d8de83..9332e18 100644
--- a/linghe/facade/transpose.py
+++ b/linghe/facade/transpose.py
@@ -9,9 +9,7 @@
 
 
 class TransposeDim01Function(torch.autograd.Function):
-    """
-
-    """
+    """"""
     @staticmethod
     def forward(ctx, x):
         return triton_transpose(x, dim0=0, dim1=1)
diff --git a/linghe/gemm/blockwise_fp8_gemm.py b/linghe/gemm/blockwise_fp8_gemm.py
index da9416a..f97a1d0 100644
--- a/linghe/gemm/blockwise_fp8_gemm.py
+++ b/linghe/gemm/blockwise_fp8_gemm.py
@@ -72,6 +72,7 @@ def triton_bb_fp8_gemm(a: torch.Tensor,
                        b_s: torch.Tensor,
                        out_dtype=torch.bfloat16,
                        block_size=128):
+    """"""
     assert a.is_contiguous() and b.is_contiguous()
     assert a_s.is_contiguous() and b_s.is_contiguous()
     K = a.size(-1)
@@ -155,6 +156,7 @@ def triton_tb_fp8_gemm(a: torch.Tensor,
                        b_s: torch.Tensor,
                        out_dtype=torch.bfloat16,
                        block_size=128):
+    """"""
     assert a.is_contiguous() and b.is_contiguous()
     assert a_s.is_contiguous() and b_s.is_contiguous()
     K = a.size(-1)
@@ -227,6 +229,7 @@ def triton_tt_fp8_gemm(a: torch.Tensor,
                        b_s: torch.Tensor,
                        out_dtype=torch.bfloat16,
                        block_size=128):
+    """"""
     assert a.is_contiguous() and b.is_contiguous()
     assert a_s.is_contiguous() and b_s.is_contiguous()
     K = a.size(-1)
diff --git a/linghe/gemm/fp32_gemm.py b/linghe/gemm/fp32_gemm.py
index 8f44067..5ad778c 100644
--- a/linghe/gemm/fp32_gemm.py
+++ b/linghe/gemm/fp32_gemm.py
@@ -297,7 +297,7 @@ def triton_scaled_fp32_gemm(a: torch.Tensor,
         scale: scale for activation tensor, 1/rms
 
     Returns:
-
+        output tensor
     """
     assert a.is_contiguous() and b.is_contiguous()
     M, K = a.size()
diff --git a/linghe/quant/block.py b/linghe/quant/block.py
index cfb37fa..e7d093f 100644
--- a/linghe/quant/block.py
+++ b/linghe/quant/block.py
@@ -39,8 +39,8 @@ def triton_block_quant(x,
         round_scale: whether round scale to power of 2
 
     Returns:
-        y: quantized tensor, float8_e4m3fn
-        s: quantization scale, float32
+        - y: quantized tensor, float8_e4m3fn
+        - s: quantization scale, float32
     """
     M, N = x.size()
     y = torch.empty((M, N), dtype=torch.float8_e4m3fn, device=x.device)
diff --git a/linghe/quant/channel.py b/linghe/quant/channel.py
index 01a9571..462b6cb 100644
--- a/linghe/quant/channel.py
+++ b/linghe/quant/channel.py
@@ -199,8 +199,8 @@ def triton_transpose_row_quant(x, round_scale=False):
         round_scale: whether round scale to power of 2
 
     Returns:
-        x_q: quantized tensor
-        x_scale: quantization scale
+        - x_q: quantized tensor
+        - x_scale: quantization scale
 
     """
     M, N = x.shape
@@ -270,8 +270,3 @@ def channel_quant_update(y, x):
                               use_fast_accum=True)
     return output, y_q, x_q, y_scale, x_scale
 
-
-def fp8_channel_f_and_b(x, w, y):
-    channel_quant_forward(x, w)
-    channel_quant_backward(y, w)
-    channel_quant_update(y, x)
diff --git a/linghe/quant/group.py b/linghe/quant/group.py
index 9dec9b8..b46ce47 100644
--- a/linghe/quant/group.py
+++ b/linghe/quant/group.py
@@ -42,8 +42,8 @@ def triton_group_quant(x,
         round_scale: whether round scale to power of 2
 
     Returns:
-        y: quantized tensor, float8_e4m3fn
-        s: quantization scale, float32
+        - y: quantized tensor, float8_e4m3fn
+        - s: quantization scale, float32
     """
     M, N = x.shape
     K = 16
diff --git a/linghe/quant/hadamard.py b/linghe/quant/hadamard.py
index 1bd77bc..0c59f96 100644
--- a/linghe/quant/hadamard.py
+++ b/linghe/quant/hadamard.py
@@ -125,10 +125,10 @@ def triton_hadamard_quant(x, hm):
         x: input tensor
         hm: hamadard matrix
     Returns:
-        x_q: rowwise quantized tensor of non-transposed x
-        x_scale: rowwise quantization scale of non-transposed x
-        xt_q: columnwise quantized tensor of transposed x
-        xt_scale: columnwise quantization scale of transposed x
+        - x_q: rowwise quantized tensor of non-transposed x
+        - x_scale: rowwise quantization scale of non-transposed x
+        - xt_q: columnwise quantized tensor of transposed x
+        - xt_scale: columnwise quantization scale of transposed x
     """
     M, N = x.shape
     device = x.device
diff --git a/linghe/quant/smooth.py b/linghe/quant/smooth.py
index 4844511..93ac054 100644
--- a/linghe/quant/smooth.py
+++ b/linghe/quant/smooth.py
@@ -150,9 +150,7 @@ def blockwise_smooth_quant_kernel(x_ptr, q_ptr, ss_ptr, qs_ptr, max_ptr,
 def triton_smooth_quant(x, smooth_scale, x_q=None, x_scale=None,
                                reverse=False, round_scale=False,
                                calibrate=False):
-    """
-
-    """
+    """"""
     M, N = x.shape
     device = x.device
     if x_q is None:
@@ -291,9 +289,7 @@ def subrow_smooth_quant_kernel(x_ptr, q_ptr, ss_ptr, qs_ptr,
 def triton_subrow_smooth_quant(x, smooth_scale, x_q, x_scale,
                                       subrow_scales, offset, size,
                                       reverse=False, round_scale=False):
-    """
-
-    """
+    """"""
     M, N = x_q.shape
     W = 128
     if offset % N == 0:
@@ -369,9 +365,7 @@ def depracated_tokenwise_smooth_quant_kernel(x_ptr, q_ptr, ss_ptr,
 def triton_depracated_tokenwise_smooth_quant(x, smooth_scale, x_q=None,
                                                     x_scale=None, reverse=False,
                                                     round_scale=False):
-    """
-
-    """
+    """"""
     # row-wise read, row-wise write
     M, N = x.shape
     device = x.device
@@ -455,9 +449,7 @@ def triton_batch_smooth_quant(x, smooth_scales, token_count_per_expert,
                               x_q=None, x_scale=None, x_maxs=None,
                               reverse=False, round_scale=False,
                               calibrate=False):
-    """
-
-    """
+    """"""
     M, N = x.shape
     device = x.device
     n_expert = token_count_per_expert.shape[0]
@@ -578,9 +570,7 @@ def triton_batch_pad_transpose_smooth_quant(x,
                                             splits,
                                             x_q=None, x_scale=None, x_maxs=None,
                                             reverse=False, round_scale=False):
-    """
-
-    """
+    """"""
     M, N = x.shape
     device = x.device
     n_expert = token_count_per_expert.shape[0]
@@ -694,9 +684,7 @@ def triton_transpose_smooth_quant(x,
                                          round_scale=False):
     # col-wise read, row-wise write
     # M should be padded if M % 32 != 0
-    """
-
-    """
+    """"""
     M, N = x.shape
     device = x.device
     P = (M + 31) // 32 * 32 if pad else M
@@ -823,9 +811,7 @@ def triton_transpose_rescale_smooth_quant(x_q, org_smooth_scale,
                                                  reverse=True,
                                                  pad=False,
                                                  round_scale=False):
-    """
-
-    """
+    """"""
     assert reverse
     M, N = x_q.shape
     device = x_q.device
@@ -886,9 +872,7 @@ def triton_transpose_rescale_smooth_quant(x_q, org_smooth_scale,
 # dwT = yT @ x
 def triton_smooth_quant_input(x, smooth_scale, x_q=None, x_scale=None, xt_q=None,
                           transpose=True, pad=True, round_scale=False):
-    """
-
-    """
+    """"""
     x_q, x_scale, x_maxs = triton_smooth_quant(x, smooth_scale, x_q=x_q,
                                               x_scale=x_scale, reverse=False,
                                               round_scale=round_scale)
@@ -912,9 +896,7 @@ def triton_smooth_quant_gradient(y,
                                  transpose=True,
                                  pad=True,
                                  round_scale=False):
-    """
-
-    """
+    """"""
     assert reverse, ("args `smooth_scale` and/or `transpose_smooth_scale` "
                      "must be in reciprocal format in triton_smooth_quant_grad")
     y_q, y_scale, _ = triton_smooth_quant(y, smooth_scale, reverse=True,
@@ -937,9 +919,7 @@ def triton_smooth_quant_weight(w,
                                quant_scale,
                                subrow_scales, offset=0,
                                round_scale=False):
-    """
-
-    """
+    """"""
     assert w.ndim == 1
     assert w_q.size(1) == smooth_scale.size(0)
 
diff --git a/linghe/utils/gather.py b/linghe/utils/gather.py
index abba7e4..642db26 100644
--- a/linghe/utils/gather.py
+++ b/linghe/utils/gather.py
@@ -843,8 +843,7 @@ def triton_smooth_permute_with_mask_map(
         round_scale=False
 ):
     """
-    gather and optional dequant and smooth quant
-
+    gather ( and optional dequant) and smooth quant
     Args:
         inp: [num_tokens, hidden_size], rowwise quantized tensor
         row_id_map: [n_experts, num_tokens], indices
@@ -858,7 +857,8 @@ def triton_smooth_permute_with_mask_map(
         round_scale:
 
     Returns:
-
+        - output: output tensor
+        - permuted_scale: permuted scale if scale is not None
     """
     assert row_id_map.shape[1] == num_experts
     output = torch.empty((num_out_tokens, hidden_size),
diff --git a/linghe/utils/norm.py b/linghe/utils/norm.py
index c55745c..6207cc6 100644
--- a/linghe/utils/norm.py
+++ b/linghe/utils/norm.py
@@ -288,11 +288,12 @@ def triton_rms_norm_and_block_quant_forward(x: torch.Tensor,
             1: only output transposed tensor
             2: return both
     Returns:
-        out: quantization data
-        scale: quantization scale
-        rms: Reciprocal of the root mean square of the input calculated over the last dimension.
-        transpose_output: quantization data of transposed gradient
-        transpose_scale: quantization scale of transposed gradient
+        - out: quantization data.
+        - scale: quantization scale.
+        - rms: Reciprocal of the root mean square of the
+            input calculated over the last dimension.
+        - transpose_output: quantization data of transposed gradient.
+        - transpose_scale: quantization scale of transposed gradient.
     """
     # row-wise read, row-wise write
     M, N = x.shape
@@ -581,9 +582,7 @@ def triton_rms_norm_and_smooth_quant_forward(x, weight, smooth_scale=None,
                                              calibrate=False,
                                              output_rms=False,
                                              round_scale=False):
-    """
-
-    """
+    """"""
     M, N = x.shape
     assert N <= 8192 and 8192 % N == 0
     device = x.device
diff --git a/linghe/utils/rearange.py b/linghe/utils/rearange.py
index 58e5d6c..868814c 100644
--- a/linghe/utils/rearange.py
+++ b/linghe/utils/rearange.py
@@ -43,8 +43,8 @@ def triton_split_and_cat(x, counts, indices, scales=None):
         scales: [bs]
 
     Returns:
-        y: output tensor
-        output_scales: output scales if scales is not None
+        - y: output tensor
+        - output_scales: output scales if scales is not None
     """
     M, N = x.shape
     n_split = counts.shape[0]
diff --git a/linghe/utils/rope.py b/linghe/utils/rope.py
index 85f5696..882b014 100644
--- a/linghe/utils/rope.py
+++ b/linghe/utils/rope.py
@@ -89,8 +89,8 @@ def triton_half_rope_forward(q, k, freqs):
         freqs: rope freqs
 
     Returns:
-        qo:
-        ko:
+        - qo: query output
+        - ko: key output
     """
     L, B, H, D = q.shape
     h = k.shape[2]
@@ -340,9 +340,9 @@ def triton_qk_norm_and_half_rope_forward(qkv, q_norm_weight, k_norm_weight,
         transpose: whether qkv is tranposed, i.e., [S, B, dim],
             only support transpose format currently
     Returns:
-        qo: shape [B, S, H, head_dim]
-        ko: shape [B, S, h, head_dim]
-        vo: shape [B, S, h, head_dim]
+        - qo: shape [B, S, H, head_dim]
+        - ko: shape [B, S, h, head_dim]
+        - vo: shape [B, S, h, head_dim]
     """
 
     assert transpose
@@ -560,9 +560,9 @@ def triton_qk_norm_and_half_rope_backward(gq, gk, gv, qkv, q_norm_weight,
         interleave:
 
     Returns:
-        dqkv: gradient of qkv
-        dqw: gradient of q_norm_weight
-        dkw: gradient of k_norm_weight
+        - dqkv: gradient of qkv
+        - dqw: gradient of q_norm_weight
+        - dkw: gradient of k_norm_weight
     """
     assert transpose
     B, L, H, D = gq.shape
diff --git a/linghe/utils/scatter.py b/linghe/utils/scatter.py
index bb39945..2a4e465 100644
--- a/linghe/utils/scatter.py
+++ b/linghe/utils/scatter.py
@@ -104,7 +104,7 @@ def triton_scatter_add(x, outputs, indices):
         indices: indices
 
     Returns:
-        outputs
+        output tensor
     """
     M, N = x.shape
 
@@ -186,8 +186,8 @@ def triton_unpermute_with_mask_map(
         probs: [num_out_tokens]
 
     Returns:
-        output: [num_tokens, hidden_size]
-        restore_probs: [num_tokens, num_experts]
+        - output: [num_tokens, hidden_size]
+        - restore_probs: [num_tokens, num_experts]
     """
     hidden_size = grad.shape[1]
     num_tokens, num_experts = row_id_map.shape  # not transposed
diff --git a/linghe/utils/silu.py b/linghe/utils/silu.py
index bfcf910..90545b3 100644
--- a/linghe/utils/silu.py
+++ b/linghe/utils/silu.py
@@ -122,8 +122,8 @@ def triton_weighted_silu_backward(g: torch.Tensor,
         weight: weight tensor
 
     Returns:
-        dx: gradient of x
-        dw: gradient of weight
+        - dx: gradient of x
+        - dw: gradient of weight
     """
     # row-wise read, row-wise write
     M, N = x.shape
@@ -228,10 +228,10 @@ def triton_silu_and_block_quant_forward(x,
             2: output both
 
     Returns:
-        out: quantized tensor
-        scale: quantization scale
-        transpose_output: quantized tensor of transposed output
-        transpose_scale: quantization scale of transposed output
+        - out: quantized tensor
+        - scale: quantization scale
+        - transpose_output: quantized tensor of transposed output
+        - transpose_scale: quantization scale of transposed output
     """
     M, N = x.shape
     n = N // 2
@@ -349,10 +349,10 @@ def triton_silu_and_block_quant_backward(g, x,
         round_scale: whether round to power of 2
 
     Returns:
-        dx: quantized non-transposed gradient
-        dx_scale: scales of quantization non-transposed gradient
-        transpose_dx: quantized transposed gradient
-        transpose_dx_scale: scales of quantization transposed gradient
+        - dx: quantized non-transposed gradient
+        - dx_scale: scales of quantization non-transposed gradient
+        - transpose_dx: quantized transposed gradient
+        - transpose_dx_scale: scales of quantization transposed gradient
     """
     M, N = x.shape
     n = N // 2
@@ -480,10 +480,10 @@ def triton_batch_weighted_silu_and_block_quant_forward(x,
             2: output both
 
     Returns:
-        out: quantized tensor
-        scale: quantization scale
-        transpose_output: quantized tensor of transposed output
-        transpose_scale: quantization scale of transposed output
+        - out: quantized tensor
+        - scale: quantization scale
+        - transpose_output: quantized tensor of transposed output
+        - transpose_scale: quantization scale of transposed output
     """
     M, N = x.shape
     n = N // 2
@@ -642,11 +642,11 @@ def triton_batch_weighted_silu_and_block_quant_backward(g, x, weight,
         splits: python int list of token count per expert
         round_scale: whether round scale to power of 2
     Returns:
-        dx: quantized non-transposed gradient
-        dx_scale: scales of quantization non-transposed gradient
-        dw: gradient of weight
-        transpose_dx: quantized transposed gradient
-        transpose_dx_scale: scales of quantization transposed gradient
+        - dx: quantized non-transposed gradient
+        - dx_scale: scales of quantization non-transposed gradient
+        - dw: gradient of weight
+        - transpose_dx: quantized transposed gradient
+        - transpose_dx_scale: scales of quantization transposed gradient
     """
     # row-wise read, row-wise write
     M, N = x.shape
@@ -793,9 +793,7 @@ def compatible_silu_and_smooth_quant_forward_kernel(x_ptr, smooth_scale_ptr, out
 def triton_silu_and_smooth_quant_forward(x, smooth_scale=None, out=None, scale=None,
                                   maxs=None, round_scale=False,
                                   calibrate=False):
-    """
-
-    """
+    """"""
     M, N = x.shape
     n = N // 2
     device = x.device
@@ -995,9 +993,7 @@ def triton_silu_and_smooth_quant_backward(g, x,
                                    transpose_smooth_scale=None,
                                    reverse=True,
                                    round_scale=False):
-    """
-
-    """
+    """"""
     assert round_scale
     M, N = x.shape
     n = N // 2
@@ -1112,9 +1108,7 @@ def triton_batch_weighted_silu_and_smooth_quant_forward(x,
                                                         round_scale=False,
                                                         reverse=False,
                                                         calibrate=False):
-    """
-
-    """
+    """"""
     M, N = x.shape
     n = N // 2
     n_experts = counts.shape[0]
@@ -1365,9 +1359,7 @@ def triton_batch_weighted_silu_and_smooth_quant_backward(g, x, weight,
                                                          splits=None,
                                                          reverse=True,
                                                          round_scale=False):
-    """
-
-    """
+    """"""
     assert round_scale
     M, N = x.shape
     n = N // 2
diff --git a/tests/test_hadamard_quant.py b/tests/test_hadamard_quant.py
index c4c6a57..f508e2f 100644
--- a/tests/test_hadamard_quant.py
+++ b/tests/test_hadamard_quant.py
@@ -12,7 +12,7 @@
                               torch_hadamard_transform,
                               torch_row_quant,
                               )
-
+from linghe.facade.hadamard_quant_linear import HadamardQuantLinear
 
 
 
@@ -73,6 +73,28 @@ def test_hadamard_quant(M=8192, N=1024, K=2048, B=64, bench=False):
     output_check(dyst, dyt_scale, 'dyt.scale')
 
 
+def test_hadamard_quant_linear(M=8192, N=1024, K=2048, B=64):
+
+    dtype = torch.bfloat16 
+    device = 'cuda:0'
+    linear = HadamardQuantLinear(K, N, bias=False, dtype=dtype, device=device)
+    x = torch.randn((M, K), dtype=dtype, device=device).requires_grad_()
+    w = torch.randn((N, K), dtype=dtype, device=device)
+    dy = torch.randn((M, N), dtype=dtype, device=device)
+    linear.weight.data.copy_(w)
+
+    y_ref = x@w.t()
+    y = linear(x)
+    output_check(y_ref, y, mode='y')
+
+    dx_ref = dy@w 
+    dw_ref = dy.t()@x
+    y.backward(dy)
+    dw = linear.weight.grad 
+    dx = x.grad
+    output_check(dx_ref, dx, mode='dx')
+    output_check(dw_ref, dw, mode='dw')
 
 if __name__ == '__main__':
-    test_hadamard_quant(M=8192, N=1024, K=2048, B=64, bench=False)
\ No newline at end of file
+    test_hadamard_quant(M=8192, N=1024, K=2048, B=64, bench=False)
+    test_hadamard_quant_linear(M=8192, N=1024, K=2048, B=64)
\ No newline at end of file
diff --git a/tests/test_smooth_quant.py b/tests/test_smooth_quant.py
index 4ddc166..b0011d9 100644
--- a/tests/test_smooth_quant.py
+++ b/tests/test_smooth_quant.py
@@ -15,7 +15,7 @@
                                torch_make_indices,
                                torch_smooth_quant,
                                round_up)
-
+from linghe.facade.smooth_quant_linear import SmoothQuantLinear
 
 def torch_split_smooth_quant(x_split, smooth_scales, round_scale=False):
     x_qs = []
@@ -294,6 +294,32 @@ def test_triton_batch_smooth_quant(M=4096, N=4096, n_experts=32, topk=8,
                        n_repeat=n_repeat, ref_time=ref_time)
 
 
+
+
+def test_smooth_quant_linear(M=8192, N=1024, K=2048):
+
+    dtype = torch.bfloat16 
+    device = 'cuda:0'
+    linear = SmoothQuantLinear(K, N, bias=False, dtype=dtype, device=device)
+    x = (10*torch.randn((M, K), dtype=dtype, device=device)).requires_grad_()
+    w = 0.1*torch.randn((N, K), dtype=dtype, device=device)
+    dy = 1e-6*torch.randn((M, N), dtype=dtype, device=device)
+    linear.weight.data.copy_(w)
+
+    y_ref = x@w.t()
+    y = linear(x)
+    output_check(y_ref, y, mode='y')
+
+    dx_ref = dy@w 
+    dw_ref = dy.t()@x
+    y.backward(dy)
+    dw = linear.weight.grad 
+    dx = x.grad
+    output_check(dx_ref, dx, mode='dx')
+    output_check(dw_ref, dw, mode='dw')
+
+
+
 if __name__ == '__main__':
     test_triton_smooth_quant(M=16384, N=2048, bench=False)
     test_triton_smooth_quant(M=8192, N=4096, bench=False)
@@ -326,3 +352,4 @@ def test_triton_batch_smooth_quant(M=4096, N=4096, n_experts=32, topk=8,
 
     test_triton_batch_smooth_quant(M=4096, N=4096, n_experts=32, topk=8,
                                    round_scale=False)
+    test_smooth_quant_linear(M=8192, N=1024, K=2048)
\ No newline at end of file