Skip to content

Commit da427df

Browse files
authored
Merge pull request #319 from CESNET/dpdk-rss-fix
dpdk: fix RSS for X710 (i40e)
2 parents 4546ff6 + 5d7e650 commit da427df

File tree

8 files changed

+83
-29
lines changed

8 files changed

+83
-29
lines changed

init/config2args.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ def process_input_dpdk_plugin(settings):
157157
mtu = settings.get("mtu", 1518)
158158
if mtu is not None:
159159
primary_param += f"mtu={mtu};"
160+
rss_offload = settings.get("rss_offload", None)
161+
if rss_offload is not None:
162+
primary_param += f"rss={rss_offload};"
160163
primary_param += f"eal={eal}\""
161164

162165
params = []

init/link0.conf.example

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ input_plugin:
3535
eal_opts: null # EAL options (null = default options)
3636
mtu: null # Maximum Transmission Unit (defaults to RTE_ETHER_MAX_LEN)
3737

38+
# Our default RSS configuration is RTE_ETH_RSS_IP.
39+
# Intel X710 (i40e) does not work reliably with it, so by default we use RSS provided by the NIC/driver.
40+
# Set this explicitly to override the driver RSS configuration.
41+
rss_offload: null
42+
3843
# Storage configuration (storage)
3944
storage:
4045
cache:

init/schema.json

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,11 @@
9393
"ndp": {
9494
"type": "object",
9595
"properties": {
96-
"device": {
96+
"device": {
9797
"type": "array",
9898
"items": {
9999
"type": "string"
100-
}
100+
}
101101
},
102102
"queues": {
103103
"type": "string"
@@ -182,6 +182,12 @@
182182
"string",
183183
"null"
184184
]
185+
},
186+
"rss_offload": {
187+
"type": [
188+
"integer",
189+
"null"
190+
]
185191
}
186192
},
187193
"required": [

src/plugins/input/dpdk/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ input_plugin:
1717
workers_cpu_list: []
1818
eal_opts: null
1919
mtu: 1518
20+
rss_offload: null
2021
```
2122
2223
## Parameters
@@ -36,6 +37,7 @@ input_plugin:
3637
|__workers_cpu_list__| [] (autofill) | List of CPU cores assigned to RX queues (must match number of rx_queues) |
3738
|__eal_opts__ | null | Extra options to be passed to the DPDK EAL (Environment Abstraction Layer). Can be used for fine-tuning DPDK behavior.|
3839
|__mtu__ | 1518 | Maximum Transmission Unit size for the interface. Defines the maximum packet size that can be received.|
40+
|__rss_offload__ | null | RSS offload configuration. Can be used to override the default RSS offload configuration.|
3941

4042
## How to use
4143

@@ -240,9 +242,12 @@ grubby --update-kernel ALL --args "isolcpus=2-19,22-39"
240242
```
241243
242244
243-
### 4. Validate with dpdk-testpmd
245+
### 4. Troubleshooting
244246
245-
TODO
247+
⚠️ RSS on Intel X710 (i40e)
248+
249+
We observed that RSS on Intel X710 (i40e) may not distribute packets across multiple RX queues with the default RTE_ETH_RSS_IP.
250+
For X710 (i40e) we use full RSS offload provided by the driver. If you experience similar issues, try to set `rss_offload` explicitly to override the default RSS offload configuration.
246251
247252
## FAQ
248253

src/plugins/input/dpdk/src/dpdk.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,14 @@ void DpdkCore::configure(const char* params)
8686
uint16_t rxQueueCount = parser.rx_queues();
8787
m_mBufsCount = parser.pkt_buffer_size();
8888
uint16_t mtuSize = parser.mtu_size();
89+
uint64_t rssOffload = parser.rss_offload();
8990

9091
configureEal(parser.eal_params());
9192

9293
m_dpdkDevices.reserve(parser.port_numbers().size());
9394
for (auto portID : parser.port_numbers()) {
94-
m_dpdkDevices.emplace_back(portID, rxQueueCount, mempoolSize, m_mBufsCount, mtuSize);
95+
m_dpdkDevices
96+
.emplace_back(portID, rxQueueCount, mempoolSize, m_mBufsCount, mtuSize, rssOffload);
9597
}
9698

9799
isConfigured = true;

src/plugins/input/dpdk/src/dpdk.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class DpdkOptParser : public OptionsParser {
3434
uint16_t rx_queues_ = 1;
3535
std::string eal_;
3636
uint16_t mtu_;
37+
uint64_t rss_offload_ = 0;
3738

3839
std::vector<uint16_t> parsePortNumbers(std::string arg)
3940
{
@@ -123,6 +124,20 @@ class DpdkOptParser : public OptionsParser {
123124
return true;
124125
},
125126
RequiredArgument);
127+
register_option(
128+
"r",
129+
"rss",
130+
"VALUE",
131+
"RSS offload value. Default: 0",
132+
[this](const char* arg) {
133+
try {
134+
rss_offload_ = str2num<decltype(rss_offload_)>(arg);
135+
} catch (std::invalid_argument&) {
136+
return false;
137+
}
138+
return true;
139+
},
140+
RequiredArgument);
126141
register_option(
127142
"e",
128143
"eal",
@@ -160,6 +175,8 @@ class DpdkOptParser : public OptionsParser {
160175
uint16_t rx_queues() const { return rx_queues_; }
161176

162177
uint16_t mtu_size() const { return mtu_; }
178+
179+
uint64_t rss_offload() const { return rss_offload_; }
163180
};
164181

165182
class DpdkCore {

src/plugins/input/dpdk/src/dpdkDevice.cpp

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ DpdkDevice::DpdkDevice(
4242
uint16_t rxQueueCount,
4343
uint16_t memPoolSize,
4444
uint16_t mbufsCount,
45-
uint16_t mtuSize)
45+
uint16_t mtuSize,
46+
uint64_t rssOffload)
4647
: m_portID(portID)
4748
, m_rxQueueCount(rxQueueCount)
4849
, m_txQueueCount(0)
@@ -51,13 +52,13 @@ DpdkDevice::DpdkDevice(
5152
, m_supportedRSS(false)
5253
, m_supportedHWTimestamp(false)
5354
, m_mtuSize(mtuSize)
55+
, m_rssOffload(rssOffload)
5456
{
5557
validatePort();
5658
recognizeDriver();
5759
configurePort();
5860
initMemPools(memPoolSize);
5961
setupRxQueues(memPoolSize);
60-
configureRSS();
6162
enablePort();
6263
}
6364

@@ -95,9 +96,13 @@ void DpdkDevice::recognizeDriver()
9596
std::cerr << "\tflow type RSS offloads: " << rteDevInfo.flow_type_rss_offloads << std::endl;
9697

9798
/* Check if RSS hashing is supported in NIC */
98-
m_supportedRSS = (rteDevInfo.flow_type_rss_offloads & RTE_ETH_RSS_IP) != 0;
99-
std::cerr << "\tDetected RSS offload capability: " << (m_supportedRSS ? "yes" : "no")
100-
<< std::endl;
99+
if (m_rxQueueCount > 1) {
100+
m_supportedRSS = (rteDevInfo.flow_type_rss_offloads & RTE_ETH_RSS_IP) != 0;
101+
std::cerr << "\tDetected RSS offload capability: " << (m_supportedRSS ? "yes" : "no")
102+
<< std::endl;
103+
} else {
104+
m_supportedRSS = false;
105+
}
101106

102107
/* Check if HW timestamps are supported, we support NFB cards only */
103108
if (m_isNfbDpdkDriver) {
@@ -154,7 +159,9 @@ rte_eth_conf DpdkDevice::createPortConfig()
154159

155160
if (m_supportedRSS) {
156161
portConfig.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
162+
portConfig.rx_adv_conf.rss_conf = createRSSConfig();
157163
} else {
164+
std::cerr << "Skipped RSS hash setting for port " << m_portID << "." << std::endl;
158165
portConfig.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
159166
}
160167

@@ -220,12 +227,9 @@ void DpdkDevice::setupRxQueues(uint16_t memPoolSize)
220227
<< " set up. Size of each queue: " << rxQueueSize << std::endl;
221228
}
222229

223-
void DpdkDevice::configureRSS()
230+
rte_eth_rss_conf DpdkDevice::createRSSConfig()
224231
{
225-
if (!m_supportedRSS) {
226-
std::cerr << "Skipped RSS hash setting for port " << m_portID << "." << std::endl;
227-
return;
228-
}
232+
struct rte_eth_rss_conf rssConfig = {};
229233

230234
rte_eth_dev_info rteDevInfo;
231235
if (rte_eth_dev_info_get(m_portID, &rteDevInfo)) {
@@ -243,23 +247,32 @@ void DpdkDevice::configureRSS()
243247
return hashKey[idx++ % sizeof(hashKey)];
244248
});
245249

246-
const uint64_t rssOffloads = rteDevInfo.flow_type_rss_offloads & RTE_ETH_RSS_IP;
247-
if (rssOffloads != RTE_ETH_RSS_IP) {
248-
std::cerr << "RTE_ETH_RSS_IP is not supported by the card. Used subset: " << rssOffloads
249-
<< std::endl;
250+
uint64_t rssOffloads = 0;
251+
if (m_rssOffload) { // user specified RSS offload
252+
rssOffloads = m_rssOffload;
253+
} else {
254+
if (std::string(rteDevInfo.driver_name) == "net_i40e") {
255+
std::cerr << "RTE_ETH_RSS_IP is not supported reliably by this driver, falling back to "
256+
"NIC-provided RSS: "
257+
<< rteDevInfo.flow_type_rss_offloads << std::endl;
258+
std::cerr << "You can override this behavior using the 'rss' configuration parameter."
259+
<< std::endl;
260+
rssOffloads = rteDevInfo.flow_type_rss_offloads;
261+
} else {
262+
rssOffloads = rteDevInfo.flow_type_rss_offloads & RTE_ETH_RSS_IP;
263+
if (rssOffloads != RTE_ETH_RSS_IP) {
264+
std::cerr << "RTE_ETH_RSS_IP is not supported by the card. Used subset: "
265+
<< rssOffloads << std::endl;
266+
}
267+
}
250268
}
251269

252-
struct rte_eth_rss_conf rssConfig = {};
270+
std::cerr << "Using RSS offloads: " << rssOffloads << std::endl;
271+
253272
rssConfig.rss_key = m_hashKey.data();
254273
rssConfig.rss_key_len = rssHashKeySize;
255274
rssConfig.rss_hf = rssOffloads;
256-
257-
int ret = rte_eth_dev_rss_hash_update(m_portID, &rssConfig);
258-
if (ret < 0) {
259-
std::cerr << "Setting RSS {" << rssOffloads << "} for port " << m_portID
260-
<< " failed. Errno:" << ret << std::endl;
261-
throw PluginError("DpdkDevice::configureRSS() has failed.");
262-
}
275+
return rssConfig;
263276
}
264277

265278
void DpdkDevice::enablePort()

src/plugins/input/dpdk/src/dpdkDevice.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,15 @@ class DpdkDevice {
4848
* @param memPoolSize The size of the memory pool for packet buffers.
4949
* @param mbufsCount The number of mbufs (packet buffers) to be allocated.
5050
* @param mtuSize Maximum transmission unit of input interface.
51+
* @param rssOffload RSS offload value. 0 for subset of RTE_ETH_RSS_IP.
5152
*/
5253
DpdkDevice(
5354
uint16_t portID,
5455
uint16_t rxQueueCount,
5556
uint16_t memPoolSize,
5657
uint16_t mbufsCount,
57-
uint16_t mtuSize);
58+
uint16_t mtuSize,
59+
uint64_t rssOffload);
5860

5961
/**
6062
* @brief Receives packets from the specified receive queue of the DPDK device.
@@ -84,7 +86,7 @@ class DpdkDevice {
8486
rte_eth_conf createPortConfig();
8587
void initMemPools(uint16_t memPoolSize);
8688
void setupRxQueues(uint16_t memPoolSize);
87-
void configureRSS();
89+
rte_eth_rss_conf createRSSConfig();
8890
void enablePort();
8991
void createRteMempool(uint16_t mempoolSize);
9092
void setRxTimestampDynflag();
@@ -102,6 +104,7 @@ class DpdkDevice {
102104
int m_rxTimestampOffset;
103105
int m_rxTimestampDynflag;
104106
uint16_t m_mtuSize;
107+
uint64_t m_rssOffload = 0;
105108
};
106109

107110
} // namespace ipxp

0 commit comments

Comments
 (0)