Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1028,12 +1028,30 @@ void layout_optimizer::set_onednn_dyn_conv_preferred_format(convolution_node& no
node.set_preferred_input_fmt(0, get_fsv16_format(rank));
node.set_preferred_output_fmt(0, get_fsv16_format(rank));

// Override with default format for small channels (≤ 4)
if (input_channels > 0 && input_channels <= 4) {
// Override input for small channels (≤ 16)
// fsv16 format uses 16-element blocks. channels ≤ 16 waste block padding
// e.g. 8ch uses only 8/16 elements per block (50% waste), planar format is more efficient
if (input_channels > 0 && input_channels <= 16) {
node.set_preferred_input_fmt(0, format::get_default_format(rank));
}

if (output_channels > 0 && output_channels <= 4) {
// Override output for small channels (≤ 16)
// same as input - avoid fsv16 block padding overhead for small channel counts
if (output_channels > 0 && output_channels <= 16) {
node.set_preferred_output_fmt(0, format::get_default_format(rank));
}

// Override output for channel expansion operations (small input → large output)
// when expanding from small input channels (≤16) to large output channels (≥32),
// planar output format enables OneDNN to select optimized JIT kernel instead of reference kernel
// Thresholds explained:
// - input ≤ 16: matches fsv16 block size, input side uses planar format (set above)
// - output ≥ 32: 2 or more fsv16 blocks (32/16=2), where blocked write overhead exceeds
// sequential write benefits. planar format provides better cache locality
// and memory access patterns for large channel generation
// e.g. 3ch → 1024ch would create 64 fsv16 blocks with scattered writes,
// but planar format allows efficient sequential writes
if (input_channels > 0 && input_channels <= 16 && output_channels >= 32) {
node.set_preferred_output_fmt(0, format::get_default_format(rank));
}
}
Expand Down
Loading