diff --git a/src/zsyrk_LN.jdf b/src/zsyrk_LN.jdf index 25e95711..d6ad30cc 100644 --- a/src/zsyrk_LN.jdf +++ b/src/zsyrk_LN.jdf @@ -21,6 +21,9 @@ descA [type = "const parsec_tiled_matrix_t*"] beta [type = "dplasma_complex64_t"] descC [type = "parsec_tiled_matrix_t*"] +/* Look ahead on both dimensions */ +lookX [type = "int" hidden=on default="dplasma_aux_getGEMMLookahead(descC)"] + zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_NB(descA, k)); %} ] /* Execution Space */ @@ -36,6 +39,8 @@ zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_N -> ((descA->nt>=(k+2))) ? C zsyrk(n, k+1) -> ((descA->nt==(k+1))) ? descC(n,n) + CTL ctlx -> (k < (descA->nt-lookX)) ? ctlx zsyrk_in_data_A0(n, k+lookX) + BODY { int tempnn = (n==(descC->nt-1)) ? (descC->n-(n*descC->nb)) : descC->nb; @@ -64,6 +69,7 @@ zsyrk_in_data_A0(n, k) [profile = off] READ A <- descA(n,k) -> A zsyrk(n, k) + CTL ctlx <- (k >= lookX) ? ctlx zsyrk(n, k-lookX) BODY { @@ -87,6 +93,9 @@ zgemm(n, m, k) [ flops = inline_c %{ return FLOPS_ZGEMM(CLEAN_MB(descC, m), CLEA -> ((descA->nt==(k+1))) ? descC(m,n) -> ((descA->nt>=(k+2))) ? C zgemm(n, m, k+1) + CTL ctla -> (k < (descA->nt-lookX)) ? ctla zgemm_in_data_A0(m, k+lookX) + CTL ctlb -> (k < (descA->nt-lookX)) ? ctlb zgemm_in_data_A1(n, k+lookX) + BODY { int tempmm = ((m)==(descC->mt-1)) ? (descC->m-(m*descC->mb)) : descC->mb; @@ -118,6 +127,7 @@ zgemm_in_data_A1(n, k) [profile = off] READ B <- descA(n,k) -> B zgemm(n, (n+1)..(descC->mt-1), k) + CTL ctlb <- (k >= lookX) ? ctlb zgemm(n, (n+1) .. (descC->mt-1), k-lookX) BODY { @@ -135,6 +145,7 @@ zgemm_in_data_A0(m, k) [profile = off] READ A <- descA(m,k) -> A zgemm(0..(descC->mt-2), m, k) + CTL ctla <- (k >= lookX) ? ctla zgemm(0 .. (m-1), m, k-lookX) BODY { diff --git a/src/zsyrk_LT.jdf b/src/zsyrk_LT.jdf index e11e081b..9a494882 100644 --- a/src/zsyrk_LT.jdf +++ b/src/zsyrk_LT.jdf @@ -21,6 +21,9 @@ descA [type = "const parsec_tiled_matrix_t*"] beta [type = "dplasma_complex64_t"] descC [type = "parsec_tiled_matrix_t*"] +/* Look ahead on both dimensions */ +lookX [type = "int" hidden=on default="dplasma_aux_getGEMMLookahead(descC)"] + zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_MB(descA, k)); %} ] /* Execution Space */ @@ -36,6 +39,8 @@ zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_M -> ((descA->mt>=(2+k))) ? C zsyrk(n, k+1) -> ((descA->mt==(1+k))) ? descC(n,n) + CTL ctlx -> (k < (descA->nt-lookX)) ? ctlx zsyrk_in_data_A0(n, k+lookX) + BODY { int tempnn = (n==(descC->nt-1)) ? (descC->n-(n*descC->nb)) : descC->nb; @@ -64,6 +69,7 @@ zsyrk_in_data_A0(n, k) [profile = off] READ A <- descA(k,n) -> A zsyrk(n, k) + CTL ctlx <- (k >= lookX) ? ctlx zsyrk(n, k-lookX) BODY { @@ -87,6 +93,9 @@ zgemm(n, m, k) [ flops = inline_c %{ return FLOPS_ZGEMM(CLEAN_MB(descC, m), CLEA -> ((descA->mt==(k+1))) ? descC(m,n) -> ((descA->mt>=(2+k))) ? C zgemm(n, m, k+1) + CTL ctla -> (k < (descA->nt-lookX)) ? ctla zgemm_in_data_A0(m, k+lookX) + CTL ctlb -> (k < (descA->nt-lookX)) ? ctlb zgemm_in_data_A1(n, k+lookX) + BODY { int tempmm = ((m)==(descC->mt-1)) ? (descC->m-(m*descC->mb)) : descC->mb; @@ -117,6 +126,7 @@ zgemm_in_data_A1(n, k) [profile = off] READ B <- descA(k,n) -> B zgemm(n, (n+1)..(descC->mt-1), k) + CTL ctlb <- (k >= lookX) ? ctlb zgemm(n, (n+1) .. (descC->mt-1), k-lookX) BODY { @@ -134,6 +144,7 @@ zgemm_in_data_A0(m, k) [profile = off] READ A <- descA(k,m) -> A zgemm(0..(descC->mt-2), m, k) + CTL ctla <- (k >= lookX) ? ctla zgemm(0 .. (m-1), m, k-lookX) BODY { diff --git a/src/zsyrk_UN.jdf b/src/zsyrk_UN.jdf index c1092341..8fff8342 100644 --- a/src/zsyrk_UN.jdf +++ b/src/zsyrk_UN.jdf @@ -21,6 +21,9 @@ descA [type = "const parsec_tiled_matrix_t*"] beta [type = "dplasma_complex64_t"] descC [type = "parsec_tiled_matrix_t*"] +/* Look ahead on both dimensions */ +lookX [type = "int" hidden=on default="dplasma_aux_getGEMMLookahead(descC)"] + zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_NB(descA, k)); %} ] /* Execution Space */ @@ -36,6 +39,8 @@ zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_N -> ((descA->nt>=(2+k))) ? C zsyrk(n, k+1) -> ((descA->nt==(k+1))) ? descC(n,n) + CTL ctlx -> (k < (descA->nt-lookX)) ? ctlx zsyrk_in_data_A0(n, k+lookX) + BODY { int tempnn = (n==(descC->nt-1)) ? (descC->n-(n*descC->nb)) : descC->nb; @@ -63,6 +68,7 @@ zsyrk_in_data_A0(n, k) [profile = off] READ A <- descA(n,k) -> A zsyrk(n, k) + CTL ctlx <- (k >= lookX) ? ctlx zsyrk(n, k-lookX) BODY { @@ -86,6 +92,9 @@ zgemm(n, m, k) [ flops = inline_c %{ return FLOPS_ZGEMM(CLEAN_NB(descC, n), CLEA -> ((descA->nt>=(k+2))) ? C zgemm(n, m, k+1) -> ((descA->nt==(k+1))) ? descC(n,m) + CTL ctla -> (k < (descA->nt-lookX)) ? ctla zgemm_in_data_A1(m, k+lookX) + CTL ctlb -> (k < (descA->nt-lookX)) ? ctlb zgemm_in_data_A0(n, k+lookX) + BODY { int tempnn = (n==(descC->nt-1)) ? (descC->n-(n*descC->nb)) : descC->nb; @@ -117,6 +126,7 @@ zgemm_in_data_A1(m, k) [profile = off] READ B <- descA(m,k) -> B zgemm(0..(descC->mt-2), m, k) + CTL ctla <- (k >= lookX) ? ctla zgemm(0 .. (m-1), m, k-lookX) BODY { @@ -134,6 +144,7 @@ zgemm_in_data_A0(n, k) [profile = off] READ A <- descA(n,k) -> A zgemm(n, (n+1)..(descC->mt-1), k) + CTL ctlb <- (k >= lookX) ? ctlb zgemm(n, (n+1) .. (descC->mt-1), k-lookX) BODY { diff --git a/src/zsyrk_UT.jdf b/src/zsyrk_UT.jdf index 024c3242..eb113dee 100644 --- a/src/zsyrk_UT.jdf +++ b/src/zsyrk_UT.jdf @@ -21,6 +21,9 @@ descA [type = "const parsec_tiled_matrix_t*"] beta [type = "dplasma_complex64_t"] descC [type = "parsec_tiled_matrix_t*"] +/* Look ahead on both dimensions */ +lookX [type = "int" hidden=on default="dplasma_aux_getGEMMLookahead(descC)"] + zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_MB(descA, k)); %} ] /* Execution Space */ @@ -36,6 +39,8 @@ zsyrk(n, k) [ flops = inline_c %{ return FLOPS_ZSYRK(CLEAN_NB(descC, n), CLEAN_M -> ((descA->mt-1) > k) ? C zsyrk(n, k+1) -> ((descA->mt-1) == k) ? descC(n,n) + CTL ctlx -> (k < (descA->nt-lookX)) ? ctlx zsyrk_in_data_A0(n, k+lookX) + BODY { int tempnn = (n==(descC->nt-1)) ? (descC->n-(n*descC->nb)) : descC->nb; @@ -63,6 +68,7 @@ zsyrk_in_data_A0(n, k) [profile = off] READ A <- descA(k,n) -> A zsyrk(n, k) + CTL ctlx <- (k >= lookX) ? ctlx zsyrk(n, k-lookX) BODY { @@ -86,6 +92,9 @@ zgemm(n, m, k) [ flops = inline_c %{ return FLOPS_ZGEMM(CLEAN_NB(descC, n), CLEA -> ((descA->mt==(k+1))) ? descC(n,m) -> ((descA->mt>=(2+k))) ? C zgemm(n, m, k+1) + CTL ctla -> (k < (descA->nt-lookX)) ? ctla zgemm_in_data_A1(m, k+lookX) + CTL ctlb -> (k < (descA->nt-lookX)) ? ctlb zgemm_in_data_A0(n, k+lookX) + BODY { int tempnn = (n==(descC->nt-1)) ? (descC->n-(n*descC->nb)) : descC->nb; @@ -115,6 +124,7 @@ zgemm_in_data_A1(m, k) [profile = off] READ B <- descA(k,m) -> B zgemm(0..(descC->mt-2), m, k) + CTL ctla <- (k >= lookX) ? ctla zgemm(0 .. (m-1), m, k-lookX) BODY { @@ -132,6 +142,7 @@ zgemm_in_data_A0(n, k) [profile = off] READ A <- descA(k,n) -> A zgemm(n, (n+1)..(descC->mt-1), k) + CTL ctlb <- (k >= lookX) ? ctlb zgemm(n, (n+1) .. (descC->mt-1), k-lookX) BODY {