root@06n01:~# mlnx_qos -i enp216s0f1
DCBX mode: OS controlled
Priority trust state: pcp
Receive buffer size (bytes): 262016,0,0,0,0,0,0,0,
Cable len: 7
PFC configuration:
priority 0 1 2 3 4 5 6 7
enabled 0 0 0 0 0 0 0 0
buffer 0 0 0 0 0 0 0 0
tc: 0 ratelimit: unlimited, tsa: vendor
priority: 1
tc: 1 ratelimit: unlimited, tsa: vendor
priority: 0
tc: 2 ratelimit: unlimited, tsa: vendor
priority: 2
tc: 3 ratelimit: unlimited, tsa: vendor
priority: 3
tc: 4 ratelimit: unlimited, tsa: vendor
priority: 4
tc: 5 ratelimit: unlimited, tsa: vendor
priority: 5
tc: 6 ratelimit: unlimited, tsa: vendor
priority: 6
tc: 7 ratelimit: unlimited, tsa: vendor
priority: 7
root@06n01:~#
步骤 1:网卡开启使用 DSCP 的流控方式
root:~# mlnx_qos -i <interface> --trust dscp
mlnx_qos -i enp216s0f1 --trust dscp
root@06n01:~# mlnx_qos -i enp216s0f0 --trust dscp
DCBX mode: OS controlled
Priority trust state: dscp
dscp2prio mapping:
prio:0 dscp:07,06,05,04,03,02,01,00,
prio:1 dscp:15,14,13,12,11,10,09,08,
prio:2 dscp:23,22,21,20,19,18,17,16,
prio:3 dscp:31,30,29,28,27,26,25,24,
prio:4 dscp:39,38,37,36,35,34,33,32,
prio:5 dscp:47,46,45,44,43,42,41,40,
prio:6 dscp:55,54,53,52,51,50,49,48,
prio:7 dscp:63,62,61,60,59,58,57,56,
Receive buffer size (bytes): 262016,0,0,0,0,0,0,0,
Cable len: 7
PFC configuration:
priority 0 1 2 3 4 5 6 7
enabled 0 0 0 0 0 0 0 0
buffer 0 0 0 0 0 0 0 0
tc: 0 ratelimit: unlimited, tsa: vendor
priority: 1
tc: 1 ratelimit: unlimited, tsa: vendor
priority: 0
tc: 2 ratelimit: unlimited, tsa: vendor
priority: 2
tc: 3 ratelimit: unlimited, tsa: vendor
priority: 3
tc: 4 ratelimit: unlimited, tsa: vendor
priority: 4
tc: 5 ratelimit: unlimited, tsa: vendor
priority: 5
tc: 6 ratelimit: unlimited, tsa: vendor
priority: 6
tc: 7 ratelimit: unlimited, tsa: vendor
priority: 7
root@06n01:
步骤 2:把所有 RoCE 流量的 DSCP 值设置为 26 对应于 tos 106(01101010,DSCP 取高 6 位),在交互机上面需要针对 DSCP 26 进行对应的设置:
root:~#echo 106 > /sys/class/infiniband/<mlx-device>/tc/1/traffic_class
echo 106 > /sys/class/infiniband/mlx5_1/tc/1/traffic_class
root@06n01:~# cat /sys/class/infiniband/mlx5_0/tc/1/traffic_class
root@06n01:~# echo 106 > /sys/class/infiniband/mlx5_0/tc/1/traffic_class
root@06n01:~# cat /sys/class/infiniband/mlx5_0/tc/1/traffic_class
Global tclass=106
root@06n01
查询 mlx 网卡数量
ls /sys/class/infiniband/
root@06n01:~# ls /sys/class/infiniband/
mlx5_0 mlx5_1 mlx5_2 mlx5_3
for i in `cat rdma_ip_list`; do echo $i; ssh $i "ls /sys/class/infiniband/" ;done;
查询 mlx 网网卡对应的 系统网卡名称
ls /sys/class/infiniband/mlx*/device/net/
root@06n01:~# ls /sys/class/infiniband/mlx*/device/net/
/sys/class/infiniband/mlx5_0/device/net/:
enp216s0f0
/sys/class/infiniband/mlx5_1/device/net/:
enp216s0f1
/sys/class/infiniband/mlx5_2/device/net/:
ens4f0
/sys/class/infiniband/mlx5_3/device/net/:
ens4f1
步骤 3:将 rdma connection manager 的优先级也设置为 DSCP 26
root:~# cma_roce_tos -d <mlx-device> -t 106
cma_roce_tos -d mlx5_1 -t 106
root@06n01:~# cma_roce_tos -d mlx5_0
0
root@06n01:~# cma_roce_tos -d mlx5_0 -t 106
106
root@06n01:~# cma_roce_tos -d mlx5_0
106
root@06n01:~#
步骤 4:开启 ECN:
sysctl -w net.ipv4.tcp_ecn=1
root@06n01:~# sysctl net.ipv4.tcp_ecn
net.ipv4.tcp_ecn = 2
root@06n01:~# sysctl -w net.ipv4.tcp_ecn=1
net.ipv4.tcp_ecn = 1
root@06n01:~# sysctl net.ipv4.tcp_ecn
net.ipv4.tcp_ecn = 1
root@06n01:~#
备注:
以上配置在系统重启后会丢失,因此每次启动机器后需要重新配置以上 参数。用户可将以上配置置于/etc/rc.local 中,重启后无需重新操作 配置。
步骤 5:priority 3 里面开启使用 DCQCN:
查看网卡名称
ls /sys/class/net/
root@06n01:~# ls /sys/class/net/
bond0 bonding_masters docker0 eno1 eno2 enp216s0f0 enp216s0f1 ens4f0 ens4f1 ens5f0 ens5f1 ens6f0 ens6f1 lo
root@06n01:~#
# 设置 DCQCN
echo 1 > /sys/class/net/enp216s0f1/ecn/roce_np/enable/3
cat /sys/class/net/enp216s0f0/ecn/roce_np/enable/3
root@06n01:~# cat /sys/class/net/enp216s0f0/ecn/roce_np/enable/3
1
root@06n01:~
步骤 6:priority 6 里面配置 CNP 使用 DSCP:
查看网卡名称
ls -l /sys/class/net/*/ecn/roce_np/cnp_dscp
cat /sys/class/net/*/ecn/roce_np/cnp_dscp
配置 CNP 使用 DSCP
echo 48 > /sys/class/net/enp216s0f1/ecn/roce_np/cnp_dscp
root@06n01:~# cat /sys/class/net/*/ecn/roce_np/cnp_dscp
48
48
48
48
root@06n01:~#
步骤 7:在网口开启 PFC,使用 mlnx_qos 工具:
root:~# mlnx_qos -i <interface> --trust=dscp --pfc 0,0,0,1,0,0,1,0
mlnx_qos -i enp216s0f1 --trust=dscp --pfc 0,0,0,1,0,0,1,0
root@06n01:~# mlnx_qos -i enp216s0f0 --trust=dscp --pfc 0,0,0,1,0,0,1,0
DCBX mode: OS controlled
Priority trust state: dscp
dscp2prio mapping:
prio:0 dscp:07,06,05,04,03,02,01,00,
prio:1 dscp:15,14,13,12,11,10,09,08,
prio:2 dscp:23,22,21,20,19,18,17,16,
prio:3 dscp:31,30,29,28,27,26,25,24,
prio:4 dscp:39,38,37,36,35,34,33,32,
prio:5 dscp:47,46,45,44,43,42,41,40,
prio:6 dscp:55,54,53,52,51,50,49,48,
prio:7 dscp:63,62,61,60,59,58,57,56,
Receive buffer size (bytes): 130944,130944,0,0,0,0,0,0,
Cable len: 7
PFC configuration:
priority 0 1 2 3 4 5 6 7
enabled 0 0 0 1 0 0 1 0
buffer 0 0 0 1 0 0 1 0
tc: 0 ratelimit: unlimited, tsa: vendor
priority: 1
tc: 1 ratelimit: unlimited, tsa: vendor
priority: 0
tc: 2 ratelimit: unlimited, tsa: vendor
priority: 2
tc: 3 ratelimit: unlimited, tsa: vendor
priority: 3
tc: 4 ratelimit: unlimited, tsa: vendor
priority: 4
tc: 5 ratelimit: unlimited, tsa: vendor
priority: 5
tc: 6 ratelimit: unlimited, tsa: vendor
priority: 6
tc: 7 ratelimit: unlimited, tsa: vendor
priority: 7
root@06n01:~#
备注:
以上配置在系统重启后会丢失,因此每次启动机器后需要重新配置以上 参数。用户可将以上配置置于/etc/rc.local 中,重启后无需重新操作
流控配置模版
#######网卡流控配置文件#### #### 开启ECN sysctl net.ipv4.tcp_ecn sysctl -w net.ipv4.tcp_ecn=1 sysctl net.ipv4.tcp_ecn ######网卡#### ### add for rdma ### add for ens3f0 mlnx_qos -i ens3f0 mlnx_qos -i ens3f0 --trust dscp echo 106 > /sys/class/infiniband/mlx5_0/tc/1/traffic_class cma_roce_tos -d mlx5_0 -t 106 echo 1 > /sys/class/net/ens3f0/ecn/roce_np/enable/3 echo 48 > /sys/class/net/ens3f0/ecn/roce_np/cnp_dscp mlnx_qos -i ens3f0 --trust=dscp --pfc 0,0,0,1,0,0,1,0 ### add for ens3f1 mlnx_qos -i ens3f1 mlnx_qos -i ens3f1 --trust dscp echo 106 > /sys/class/infiniband/mlx5_1/tc/1/traffic_class cma_roce_tos -d mlx5_1 -t 106 echo 1 > /sys/class/net/ens3f1/ecn/roce_np/enable/3 echo 48 > /sys/class/net/ens3f1/ecn/roce_np/cnp_dscp mlnx_qos -i ens3f1 --trust=dscp --pfc 0,0,0,1,0,0,1,0