1 另一个复现问题的方式
首先我们构造如下 msleep.sh脚本,用于触发 bug 复现:
#!/bin/sh
i=0
for i in `seq 1 10000`;
do
# i=`expr i+1`
echo "aaaaaaaaaaaaaa" > tmp.txt
done
sleep 1
接着是 usetup.sh,用于清理测试:
#!/bin/sh
killall stress
rmdir /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_1024
rmdir /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_32
rmdir /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32
最后是 setup.sh,执行测试程序:
#!/bin/sh
function handle_ctrl_c() {
echo "*************************"
echo " do usetup, exit"
echo "*************************"
./usetup.sh
exit 1
}
trap handle_ctrl_c SIGINT
mkdir /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32
echo 32 > /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/cpu.shares
mkdir /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_32
echo 32 > /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_32/cpu.shares
mkdir /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_1024
# zy_test_l1_32 32
# zy_test_l2_32 32 -> stress
# zy_test_l2_1024 1024 -> sleep
taskset -c 10 chrt -b 0 stress -c 1 -t 1000 &
stress_pid=`pgrep stress`
r_stree_pid=`echo $stress_pid | awk '{print $2}'`
echo $r_stree_pid > /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_32/tasks
./msleep.sh &
sleep_pid=$!
echo $sleep_pid > /sys/fs/cgroup/cpu,cpuacct/zy_test_l1_32/zy_test_l2_1024/tasks
#sleep 1.5
#echo $sleep_pid > /sys/fs/cgroup/cpuset/zy_test/tasks
for i in `seq 1 1000`;
do
echo "===================================`date`============================================";
cat /proc/sched_debug | grep -A 30 "cfs_rq.*zy" | grep -E "zy|tg_load_avg";
sleep 1;
done
接着开始测试,检查输出:
下面是测试中正常程序的输出:
# ./setup.sh
stress: info: [64192] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd
===================================Mon Mar 20 14:27:08 CST 2023============================================
cfs_rq[10]:/zy_test_l1_32/zy_test_l2_32
.tg_load_avg_contrib : 982
.tg_load_avg : 982
cfs_rq[10]:/zy_test_l1_32
.tg_load_avg_contrib : 30
.tg_load_avg : 1039
cfs_rq[72]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 23301
cfs_rq[72]:/zy_test_l1_32
.tg_load_avg_contrib : 44
.tg_load_avg : 2656
cfs_rq[73]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1015
.tg_load_avg : 22288
cfs_rq[73]:/zy_test_l1_32
.tg_load_avg_contrib : 205
.tg_load_avg : 2755
cfs_rq[74]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 22288
cfs_rq[74]:/zy_test_l1_32
.tg_load_avg_contrib : 200
.tg_load_avg : 2788
cfs_rq[75]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22288
cfs_rq[75]:/zy_test_l1_32
.tg_load_avg_contrib : 314
.tg_load_avg : 2617
cfs_rq[76]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22288
cfs_rq[76]:/zy_test_l1_32
.tg_load_avg_contrib : 315
.tg_load_avg : 2184
cfs_rq[77]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22288
cfs_rq[77]:/zy_test_l1_32
.tg_load_avg_contrib : 37
.tg_load_avg : 1718
cfs_rq[79]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 23301
cfs_rq[79]:/zy_test_l1_32
.tg_load_avg_contrib : 37
.tg_load_avg : 1218
cfs_rq[80]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 23301
cfs_rq[80]:/zy_test_l1_32
.tg_load_avg_contrib : 38
.tg_load_avg : 1046
cfs_rq[81]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 23301
cfs_rq[81]:/zy_test_l1_32
.tg_load_avg_contrib : 38
.tg_load_avg : 962
cfs_rq[82]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 23301
cfs_rq[82]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 940
cfs_rq[83]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 1013
cfs_rq[83]:/zy_test_l1_32
.tg_load_avg_contrib : 38
.tg_load_avg : 934
cfs_rq[84]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 4052
cfs_rq[84]:/zy_test_l1_32
.tg_load_avg_contrib : 37
.tg_load_avg : 959
cfs_rq[85]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 8104
cfs_rq[85]:/zy_test_l1_32
.tg_load_avg_contrib : 39
.tg_load_avg : 985
cfs_rq[86]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 12156
cfs_rq[86]:/zy_test_l1_32
.tg_load_avg_contrib : 39
.tg_load_avg : 1001
cfs_rq[87]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 14182
cfs_rq[87]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 1013
cfs_rq[88]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 18234
cfs_rq[88]:/zy_test_l1_32
.tg_load_avg_contrib : 42
.tg_load_avg : 1026
cfs_rq[89]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22286
cfs_rq[89]:/zy_test_l1_32
.tg_load_avg_contrib : 42
.tg_load_avg : 2008
cfs_rq[90]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 21273
cfs_rq[90]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 3315
cfs_rq[91]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22286
cfs_rq[91]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 3929
cfs_rq[92]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22286
cfs_rq[92]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 4085
cfs_rq[93]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 23299
cfs_rq[93]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 4287
cfs_rq[94]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22286
cfs_rq[94]:/zy_test_l1_32
.tg_load_avg_contrib : 43
.tg_load_avg : 4393
cfs_rq[95]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 1013
.tg_load_avg : 22286
cfs_rq[95]:/zy_test_l1_32
.tg_load_avg_contrib : 1015
.tg_load_avg : 4429
...
...
...
===================================Mon Mar 20 14:27:12 CST 2023============================================
cfs_rq[10]:/zy_test_l1_32/zy_test_l2_32
.tg_load_avg_contrib : 1014
.tg_load_avg : 1014
cfs_rq[10]:/zy_test_l1_32
.tg_load_avg_contrib : 31
.tg_load_avg : 356
cfs_rq[72]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 437
cfs_rq[72]:/zy_test_l1_32
.tg_load_avg_contrib : 0
.tg_load_avg : 203
cfs_rq[73]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 437
cfs_rq[73]:/zy_test_l1_32
.tg_load_avg_contrib : 0
.tg_load_avg : 203
cfs_rq[74]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 437
cfs_rq[74]:/zy_test_l1_32
.tg_load_avg_contrib : 0
.tg_load_avg : 203
cfs_rq[75]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 437
cfs_rq[75]:/zy_test_l1_32
.tg_load_avg_contrib : 0
.tg_load_avg : 203
cfs_rq[76]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg : 437
cfs_rq[76]:/zy_test_l1_32
.tg_load_avg_contrib : 0
.tg_load_avg : 203
cfs_rq[77]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 221
.tg_load_avg : 437
cfs_rq[77]:/zy_test_l1_32
.tg_load_avg_contrib : 71
.tg_load_avg : 203
cfs_rq[78]:/zy_test_l1_32/zy_test_l2_1024
.tg_load_avg_contrib : 0
.tg_load_avg

本文围绕Linux CFS Group负载更新问题展开。先介绍复现问题的脚本及测试输出,指出触发bug后任务组负载更新异常。接着阐述patch - 2修复问题的描述,分析未更新负载的原因。最后说明修复逻辑,包括对child cfs_rq和parent cfs_rq的更新,以及社区解决负载同步问题的方法。
最低0.47元/天 解锁文章
1005

被折叠的 条评论
为什么被折叠?



