@@ -1254,3 +1254,57 @@ func.func @missing_user_indexing_maps() {
1254
1254
// CHECK-DAG: %[[STORE_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(1)
1255
1255
// CHECK-DAG: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[LOAD_BINDING]]{{.+}} -> tensor<255x513xf32>
1256
1256
// CHECK-DAG: flow.dispatch.tensor.store %[[LOAD]], %[[STORE_BINDING]]
1257
+
1258
+ // -----
1259
+
1260
+ #pipeline_layout = #hal.pipeline.layout <bindings = [
1261
+ #hal.pipeline.binding <storage_buffer >,
1262
+ #hal.pipeline.binding <storage_buffer >,
1263
+ #hal.pipeline.binding <storage_buffer >,
1264
+ #hal.pipeline.binding <storage_buffer >
1265
+ ]>
1266
+ #encoding = #iree_encoding.encoding <operand_index = 0 : index , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d2 , d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d3 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 )>]>
1267
+ #encoding_bcast = #iree_encoding.encoding <operand_index = 0 : index , op_type = matmul , element_types = [f32 , f32 , f32 ], user_indexing_maps = [[affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d2 , d3 )>, affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>], affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d3 , d1 )>, affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 )>]>
1268
+ func.func @dequantization () {
1269
+ %c0 = arith.constant 0 : index
1270
+ %cst = arith.constant 0.000000e+00 : f32
1271
+ %0 = hal.interface.binding.subspan layout (#pipeline_layout ) binding (0 ) alignment (64 ) offset (%c0 ) flags (ReadOnly ) : !flow.dispatch.tensor <readonly :tensor <2 x128 x64 xi8 , #encoding >>
1272
+ %1 = hal.interface.binding.subspan layout (#pipeline_layout ) binding (1 ) alignment (64 ) offset (%c0 ) flags (ReadOnly ) : !flow.dispatch.tensor <readonly :tensor <2 x64 xf32 , #encoding_bcast >>
1273
+ %2 = hal.interface.binding.subspan layout (#pipeline_layout ) binding (2 ) alignment (64 ) offset (%c0 ) flags (ReadOnly ) : !flow.dispatch.tensor <readonly :tensor <2 x64 xf32 , #encoding_bcast >>
1274
+ %6 = hal.interface.binding.subspan layout (#pipeline_layout ) binding (3 ) alignment (64 ) offset (%c0 ) : !flow.dispatch.tensor <writeonly :tensor <2 x128 x64 xf32 , #encoding >>
1275
+ %7 = flow.dispatch.tensor.load %0 , offsets = [0 , 0 , 0 ], sizes = [2 , 128 , 64 ], strides = [1 , 1 , 1 ] : !flow.dispatch.tensor <readonly :tensor <2 x128 x64 xi8 , #encoding >> -> tensor <2 x128 x64 xi8 , #encoding >
1276
+ %8 = flow.dispatch.tensor.load %1 , offsets = [0 , 0 ], sizes = [2 , 64 ], strides = [1 , 1 ] : !flow.dispatch.tensor <readonly :tensor <2 x64 xf32 , #encoding_bcast >> -> tensor <2 x64 xf32 , #encoding_bcast >
1277
+ %9 = flow.dispatch.tensor.load %2 , offsets = [0 , 0 ], sizes = [2 , 64 ], strides = [1 , 1 ] : !flow.dispatch.tensor <readonly :tensor <2 x64 xf32 , #encoding_bcast >> -> tensor <2 x64 xf32 , #encoding_bcast >
1278
+ %13 = tensor.empty () : tensor <2 x128 x64 xf32 , #encoding >
1279
+ %14 = linalg.generic {index ing_maps = [affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>, affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>, affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>, affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>], iterator_types = [" parallel" , " parallel" , " parallel" ]} ins (%7 , %8 , %9 : tensor <2 x128 x64 xi8 , #encoding >, tensor <2 x64 xf32 , #encoding_bcast >, tensor <2 x64 xf32 , #encoding_bcast >) outs (%13 : tensor <2 x128 x64 xf32 , #encoding >) {
1280
+ ^bb0 (%in: i8 , %in_0: f32 , %in_1: f32 , %out: f32 ):
1281
+ %21 = arith.extui %in : i8 to i32
1282
+ %22 = arith.uitofp %21 : i32 to f32
1283
+ %23 = arith.subf %22 , %in_1 : f32
1284
+ %24 = arith.mulf %23 , %in_0 : f32
1285
+ linalg.yield %24 : f32
1286
+ } -> tensor <2 x128 x64 xf32 , #encoding >
1287
+ flow.dispatch.tensor.store %14 , %6 , offsets = [0 , 0 , 0 ], sizes = [2 , 128 , 64 ], strides = [1 , 1 , 1 ] : tensor <2 x128 x64 xf32 , #encoding > -> !flow.dispatch.tensor <writeonly :tensor <2 x128 x64 xf32 , #encoding >>
1288
+ return
1289
+ }
1290
+ // CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>
1291
+ // CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d2, d4, d7)>
1292
+ // CHECK-LABEL: func.func @dequantization()
1293
+ // CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<2x1x4x8x4x4x4x4xi8>>
1294
+ // CHECK-DAG: %[[LHS_SCALES_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor<readonly:tensor<2x4x4x4xf32>>
1295
+ // CHECK-DAG: %[[LHS_ZPS_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(2) {{.*}} : !flow.dispatch.tensor<readonly:tensor<2x4x4x4xf32>>
1296
+ // CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(3) {{.*}} : !flow.dispatch.tensor<writeonly:tensor<2x1x4x8x4x4x4x4xf32>>
1297
+ // CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]], offsets = [0, 0, 0, 0, 0, 0, 0, 0], sizes = [2, 1, 4, 8, 4, 4, 4, 4], strides = [1, 1, 1, 1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1x4x8x4x4x4x4xi8>> -> tensor<2x1x4x8x4x4x4x4xi8>
1298
+ // CHECK-DAG: %[[LHS_SCALES:.+]] = flow.dispatch.tensor.load %[[LHS_SCALES_BINDING]], offsets = [0, 0, 0, 0], sizes = [2, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x4x4xf32>> -> tensor<2x4x4x4xf32>
1299
+ // CHECK-DAG: %[[LHS_ZPS:.+]] = flow.dispatch.tensor.load %[[LHS_ZPS_BINDING]], offsets = [0, 0, 0, 0], sizes = [2, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x4x4x4xf32>> -> tensor<2x4x4x4xf32>
1300
+ // CHECK-DAG: %[[EMPTY_LHS:.+]] = tensor.empty() : tensor<2x1x4x8x4x4x4x4xf32>
1301
+ // CHECK-DAG: %[[LHS_DEQUANT:.+]] = linalg.generic
1302
+ // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP1]], #[[$MAP]]]
1303
+ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
1304
+ // CHECK-SAME: ins(%[[LHS]], %[[LHS_SCALES]], %[[LHS_ZPS]] : tensor<2x1x4x8x4x4x4x4xi8>, tensor<2x4x4x4xf32>, tensor<2x4x4x4xf32>)
1305
+ // CHECK-SAME: outs(%[[EMPTY_LHS]] : tensor<2x1x4x8x4x4x4x4xf32>)
1306
+ // CHECK: arith.extui
1307
+ // CHECK: arith.uitofp
1308
+ // CHECK: arith.subf
1309
+ // CHECK: arith.mulf
1310
+ // CHECK: flow.dispatch.tensor.store %[[LHS_DEQUANT]], %[[RESULT_BINDING]], offsets = [0, 0, 0, 0, 0, 0, 0, 0], sizes = [2, 1, 4, 8, 4, 4, 4, 4], strides = [1, 1, 1, 1, 1, 1, 1, 1] : tensor<2x1x4x8x4x4x4x4xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1x4x8x4x4x4x4xf32>>
0 commit comments