diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py index 1f624a7a8..11db74859 100644 --- a/bitsandbytes/functional.py +++ b/bitsandbytes/functional.py @@ -1944,7 +1944,10 @@ def igemmlt(A, B, SA, SB, out=None, Sout=None, dtype=torch.int32): ptr, m, n, k, ptrA, ptrB, ptrC, ptrRowScale, lda, ldb, ldc ) - if has_error == 1: + if has_error == 100: # `ERR_NOT_IMPLEMENTED` is defined as 100 in `ops.cu` + raise NotImplementedError("igemmlt not available (probably built with NO_CUBLASLT)") + + if has_error: print(f'A: {shapeA}, B: {shapeB}, C: {Sout[0]}; (lda, ldb, ldc): {(lda, ldb, ldc)}; (m, n, k): {(m, n, k)}') raise Exception('cublasLt ran into an error!') diff --git a/csrc/ops.cu b/csrc/ops.cu index 97761216c..796211fed 100644 --- a/csrc/ops.cu +++ b/csrc/ops.cu @@ -11,6 +11,8 @@ #include #include +#define ERR_NOT_IMPLEMENTED 100 + using namespace BinSearch; using std::cout; @@ -421,14 +423,7 @@ template void transform(cublasLtHandle_t ltHandl template int igemmlt(cublasLtHandle_t ltHandle, int m, int n, int k, const int8_t *A, const int8_t *B, void *C, float *row_scale, int lda, int ldb, int ldc) { #ifdef NO_CUBLASLT - cout << "" << endl; - cout << "=============================================" << endl; - cout << "ERROR: Your GPU does not support Int8 Matmul!" << endl; - cout << "=============================================" << endl; - cout << "" << endl; - assert(false); - - return 0; + return ERR_NOT_IMPLEMENTED; #else int has_error = 0; cublasLtMatmulDesc_t matmulDesc = NULL; @@ -484,7 +479,7 @@ template int igemmlt(cublasLtHandle printf("error detected"); return has_error; -#endif +#endif // NO_CUBLASLT } int fill_up_to_nearest_multiple(int value, int multiple)