服务器开启RDMA

主机网卡配置前查询

root@06n01:~# mlnx_qos -i enp216s0f1
DCBX mode: OS controlled
Priority trust state: pcp
Receive buffer size (bytes): 262016,0,0,0,0,0,0,0,
Cable len: 7
PFC configuration:
priority    0   1   2   3   4   5   6   7
enabled     0   0   0   0   0   0   0   0
buffer      0   0   0   0   0   0   0   0
tc: 0 ratelimit: unlimited, tsa: vendor
 priority:  1
tc: 1 ratelimit: unlimited, tsa: vendor
 priority:  0
tc: 2 ratelimit: unlimited, tsa: vendor
 priority:  2
tc: 3 ratelimit: unlimited, tsa: vendor
 priority:  3
tc: 4 ratelimit: unlimited, tsa: vendor
 priority:  4
tc: 5 ratelimit: unlimited, tsa: vendor
 priority:  5
tc: 6 ratelimit: unlimited, tsa: vendor
 priority:  6
tc: 7 ratelimit: unlimited, tsa: vendor
 priority:  7
root@06n01:~#

步骤 1:网卡开启使用 DSCP 的流控方式

root:~# mlnx_qos -i <interface> --trust dscp

mlnx_qos -i enp216s0f1 --trust dscp

root@06n01:~# mlnx_qos -i enp216s0f0 --trust dscp
DCBX mode: OS controlled
Priority trust state: dscp
dscp2prio mapping:
prio:0 dscp:07,06,05,04,03,02,01,00,
prio:1 dscp:15,14,13,12,11,10,09,08,
prio:2 dscp:23,22,21,20,19,18,17,16,
prio:3 dscp:31,30,29,28,27,26,25,24,
prio:4 dscp:39,38,37,36,35,34,33,32,
prio:5 dscp:47,46,45,44,43,42,41,40,
prio:6 dscp:55,54,53,52,51,50,49,48,
prio:7 dscp:63,62,61,60,59,58,57,56,
Receive buffer size (bytes): 262016,0,0,0,0,0,0,0,
Cable len: 7
PFC configuration:
priority    0   1   2   3   4   5   6   7
enabled     0   0   0   0   0   0   0   0
buffer      0   0   0   0   0   0   0   0
tc: 0 ratelimit: unlimited, tsa: vendor
 priority:  1
tc: 1 ratelimit: unlimited, tsa: vendor
 priority:  0
tc: 2 ratelimit: unlimited, tsa: vendor
 priority:  2
tc: 3 ratelimit: unlimited, tsa: vendor
 priority:  3
tc: 4 ratelimit: unlimited, tsa: vendor
 priority:  4
tc: 5 ratelimit: unlimited, tsa: vendor
 priority:  5
tc: 6 ratelimit: unlimited, tsa: vendor
 priority:  6
tc: 7 ratelimit: unlimited, tsa: vendor
 priority:  7
root@06n01:

步骤 2:把所有 RoCE 流量的 DSCP 值设置为 26 对应于 tos 106(01101010,DSCP 取高 6 位),在交互机上面需要针对 DSCP 26 进行对应的设置:

root:~#echo 106 > /sys/class/infiniband/<mlx-device>/tc/1/traffic_class

echo 106 > /sys/class/infiniband/mlx5_1/tc/1/traffic_class

root@06n01:~# cat  /sys/class/infiniband/mlx5_0/tc/1/traffic_class
root@06n01:~# echo 106 > /sys/class/infiniband/mlx5_0/tc/1/traffic_class
root@06n01:~# cat  /sys/class/infiniband/mlx5_0/tc/1/traffic_class
Global tclass=106
root@06n01

查询 mlx 网卡数量

ls /sys/class/infiniband/
root@06n01:~# ls /sys/class/infiniband/
mlx5_0  mlx5_1  mlx5_2  mlx5_3


for i in `cat rdma_ip_list`; do  echo $i; ssh $i "ls /sys/class/infiniband/" ;done;

查询 mlx 网网卡对应的 系统网卡名称

ls /sys/class/infiniband/mlx*/device/net/

root@06n01:~# ls /sys/class/infiniband/mlx*/device/net/
/sys/class/infiniband/mlx5_0/device/net/:
enp216s0f0

/sys/class/infiniband/mlx5_1/device/net/:
enp216s0f1

/sys/class/infiniband/mlx5_2/device/net/:
ens4f0

/sys/class/infiniband/mlx5_3/device/net/:
ens4f1

步骤 3:将 rdma connection manager 的优先级也设置为 DSCP 26

root:~# cma_roce_tos -d <mlx-device> -t 106

cma_roce_tos -d mlx5_1 -t 106

root@06n01:~# cma_roce_tos -d mlx5_0
0
root@06n01:~# cma_roce_tos -d mlx5_0 -t 106
106
root@06n01:~# cma_roce_tos -d mlx5_0
106
root@06n01:~#


步骤 4:开启 ECN:

sysctl -w net.ipv4.tcp_ecn=1

root@06n01:~# sysctl  net.ipv4.tcp_ecn
net.ipv4.tcp_ecn = 2
root@06n01:~# sysctl -w net.ipv4.tcp_ecn=1
net.ipv4.tcp_ecn = 1
root@06n01:~# sysctl  net.ipv4.tcp_ecn
net.ipv4.tcp_ecn = 1
root@06n01:~#

备注:

以上配置在系统重启后会丢失,因此每次启动机器后需要重新配置以上 参数。用户可将以上配置置于/etc/rc.local 中,重启后无需重新操作 配置。


步骤 5:priority 3 里面开启使用 DCQCN:

查看网卡名称 
ls /sys/class/net/
root@06n01:~# ls /sys/class/net/
bond0  bonding_masters  docker0  eno1  eno2  enp216s0f0  enp216s0f1  ens4f0  ens4f1  ens5f0  ens5f1  ens6f0  ens6f1  lo
root@06n01:~#

# 设置 DCQCN
echo 1 > /sys/class/net/enp216s0f1/ecn/roce_np/enable/3

cat /sys/class/net/enp216s0f0/ecn/roce_np/enable/3
root@06n01:~# cat /sys/class/net/enp216s0f0/ecn/roce_np/enable/3
1
root@06n01:~

步骤 6:priority 6 里面配置 CNP 使用 DSCP:

查看网卡名称 
ls -l  /sys/class/net/*/ecn/roce_np/cnp_dscp
cat /sys/class/net/*/ecn/roce_np/cnp_dscp

配置 CNP 使用 DSCP
echo 48 > /sys/class/net/enp216s0f1/ecn/roce_np/cnp_dscp

root@06n01:~# cat /sys/class/net/*/ecn/roce_np/cnp_dscp
48
48
48
48
root@06n01:~#

步骤 7:在网口开启 PFC,使用 mlnx_qos 工具:

root:~# mlnx_qos -i <interface> --trust=dscp --pfc 0,0,0,1,0,0,1,0

mlnx_qos -i enp216s0f1 --trust=dscp --pfc 0,0,0,1,0,0,1,0


root@06n01:~# mlnx_qos -i enp216s0f0 --trust=dscp --pfc 0,0,0,1,0,0,1,0
DCBX mode: OS controlled
Priority trust state: dscp
dscp2prio mapping:
prio:0 dscp:07,06,05,04,03,02,01,00,
prio:1 dscp:15,14,13,12,11,10,09,08,
prio:2 dscp:23,22,21,20,19,18,17,16,
prio:3 dscp:31,30,29,28,27,26,25,24,
prio:4 dscp:39,38,37,36,35,34,33,32,
prio:5 dscp:47,46,45,44,43,42,41,40,
prio:6 dscp:55,54,53,52,51,50,49,48,
prio:7 dscp:63,62,61,60,59,58,57,56,
Receive buffer size (bytes): 130944,130944,0,0,0,0,0,0,
Cable len: 7
PFC configuration:
priority    0   1   2   3   4   5   6   7
enabled     0   0   0   1   0   0   1   0
buffer      0   0   0   1   0   0   1   0
tc: 0 ratelimit: unlimited, tsa: vendor
 priority:  1
tc: 1 ratelimit: unlimited, tsa: vendor
 priority:  0
tc: 2 ratelimit: unlimited, tsa: vendor
 priority:  2
tc: 3 ratelimit: unlimited, tsa: vendor
 priority:  3
tc: 4 ratelimit: unlimited, tsa: vendor
 priority:  4
tc: 5 ratelimit: unlimited, tsa: vendor
 priority:  5
tc: 6 ratelimit: unlimited, tsa: vendor
 priority:  6
tc: 7 ratelimit: unlimited, tsa: vendor
 priority:  7
root@06n01:~#

备注:

以上配置在系统重启后会丢失,因此每次启动机器后需要重新配置以上 参数。用户可将以上配置置于/etc/rc.local 中,重启后无需重新操作 配置。


流控配置模版

#######网卡流控配置文件####
#### 开启ECN
sysctl  net.ipv4.tcp_ecn
sysctl -w net.ipv4.tcp_ecn=1
sysctl  net.ipv4.tcp_ecn

######网卡####
### add for rdma
### add for ens3f0
mlnx_qos -i ens3f0
mlnx_qos -i ens3f0 --trust dscp
echo 106 > /sys/class/infiniband/mlx5_0/tc/1/traffic_class
cma_roce_tos -d mlx5_0 -t 106
echo 1 > /sys/class/net/ens3f0/ecn/roce_np/enable/3
echo 48 > /sys/class/net/ens3f0/ecn/roce_np/cnp_dscp 
mlnx_qos -i ens3f0 --trust=dscp --pfc 0,0,0,1,0,0,1,0
### add for ens3f1
mlnx_qos -i ens3f1
mlnx_qos -i ens3f1 --trust dscp
echo 106 > /sys/class/infiniband/mlx5_1/tc/1/traffic_class
cma_roce_tos -d mlx5_1 -t 106
echo 1 > /sys/class/net/ens3f1/ecn/roce_np/enable/3
echo 48 > /sys/class/net/ens3f1/ecn/roce_np/cnp_dscp 
mlnx_qos -i ens3f1 --trust=dscp --pfc 0,0,0,1,0,0,1,0