Remove marlin warning (vllm-project#4918)

alexm-redhat · web-flow · commit da5a0b539d6a · 2024-05-20T14:55:34.000Z
diff --git a/csrc/quantization/gptq_marlin/gptq_marlin.cu b/csrc/quantization/gptq_marlin/gptq_marlin.cu
@@ -1519,10 +1519,6 @@ exec_config_t determine_thread_config(int prob_m, int prob_n, int prob_k,
       }
     }
 
-    printf("WARNING: Marlin kernel is reducing max_m_blocks due to small SM "
-           "GPU cache. This may "
-           "hurt performance. Consider upgrading your GPU.\n");
-
     max_m_blocks--; // Process less M blocks per invocation to reduce cache
                     // usage
   }

Original file line number	Diff line number	Diff line change
`@@ -1519,10 +1519,6 @@ exec_config_t determine_thread_config(int prob_m, int prob_n, int prob_k,`
`1519`	`1519`	`}`
`1520`	`1520`	`}`
`1521`	`1521`
`1522`		`- printf("WARNING: Marlin kernel is reducing max_m_blocks due to small SM "`
`1523`		`- "GPU cache. This may "`
`1524`		`- "hurt performance. Consider upgrading your GPU.\n");`
`1525`		`-`
`1526`	`1522`	`max_m_blocks--; // Process less M blocks per invocation to reduce cache`
`1527`	`1523`	`// usage`
`1528`	`1524`	`}`