一 mem leak简单判断
echo "venc=0" > /proc/vs_log/level
1.1 ko insmod/rmmod测试
printf "venc ko insmod test without video encoding\n"
for i in $(seq 0 10)
do
printf "before insmod cnt %d\n" $i
echo 3 > /proc/sys/vm/drop_caches
cat /proc/meminfo
cat /proc/mmz/mmz-info
insmod /lib/modules/vs_venc.ko
printf "after insmod cnt %d\n" $i
cat /proc/meminfo
cat /proc/mmz/mmz-info
rmmod vs_venc.ko
printf "after rmmod cnt %d\n" $i
cat /proc/meminfo
cat /proc/mmz/mmz-info
done
1.2 ko insmod/rmmod+unittset run测试
printf "venc ko insmod test with video encoding\n"
for i in $(seq 0 99)
do
printf "before insmod cnt %d\n" $i
echo 3 > /proc/sys/vm/drop_caches
cat /proc/meminfo
cat /proc/mmz/mmz-info
##可用的物理内存=memfree+buffers+cached
insmod /lib/modules/vs_venc.ko
./venc_unit_test --venc-cfgpath=./config/venc_unittest_path.json --gtest_filter=venc_unitest.testcase_multi_chn_h265
./venc_unit_test --venc-cfgpath=./config/venc_unittest_path.json --gtest_filter=venc_unitest.testcase_multi_chn_h264
rmmod vs_venc.ko
done
二 用户态mem leak问题查找
默认都是调用malloc/free等glibc的标准库函数,用valgrind工具来debug
下载地址:https://sourceware.org/pub/valgrind/valgrind-3.20.0.tar.bz2
2.1 交叉编译
export ARCH=arm64
export CROSS_COMPILE=aarch64-linux-gnu-
export PATH=$PATH:/opt/vs-linux/x86-arm/gcc-linaro-7.5.0-aarch64-linux-gnu/bin/
./configure --prefix=/home/jyshan/work/coding/vssdk/src/vs-mp/src/valgrind-3.18.1/install --host=aarch64-linux-gnu
make clean
make
make install
将install的内容科拷贝到pc端的nfs目录下。
2.2 fpga/单板上环境准备
##单板上
mount -t nfs -o vers=2,nolock /mnt/nfs/ 挂载pc的nfs目录到板端。
export PATH=$PATH:/mnt/test/mem/valgrind-3.20.0/install/bin/
执行valgrind -h查看是否可以正常运行。
如果有以下错误:
valgrind: failed to start tool 'memcheck' for platform 'arm64-linux': No such file or directory
这是因为找不到memcheck-arm64-linux,通过find memcheck-arm64-linux位置,
然后执行命令export VALGRIND_LIB=/mnt/test/mem/valgrind-3.20.0/install/libexec/valgrind 即可。
大家也可以用我这边编译好的bin文件直接执行。
memleakdebugtools-Linux文档类资源-优快云下载
2.3 fpga/单板上run
valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all --show-reachable=yes --trace-children=yes ./venc_unit_test --venc-cfgpath=./config/venc_unittest_path.json --gtest_filter=venc_unitest.testcase_crop_h264
valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all --show-reachable=yes --trace-children=yes ./venc_unit_test --venc-cfgpath=./config/venc_unittest_path.json --gtest_filter=venc_unitest.testcase_multi_chn_h264
其中--leak-check=full指的是完全检查内存泄漏,--show-reachable=yes是显示内存泄漏的地点,--trace-children=yes是跟入子进程。
2.4 结果分析
==142== HEAP SUMMARY:
==142== in use at exit: 2,132 bytes in 32 blocks
==142== total heap usage: 3,773 allocs, 3,741 frees, 400,376 bytes allocated
==142==
==142== Thread 1:
==142== 68 bytes in 1 blocks are still reachable in loss record 1 of 4
==142== at 0x48450F0: malloc (vg_replace_malloc.c:381)
==142== by 0x4879287: vs_mal_venc_init (vs_mal_venc.c:113)
==142== by 0x487937F: vs_mal_venc_version_get (vs_mal_venc.c:141)
==142== by 0x44D52B: handle_before_unittest() (venc_testcase_base.cpp:818)
==142== by 0x424A87: venc_unittest_run (venc_unittest_comm.cpp:1249)
==142== by 0x44E0CB: venc_testcase_base (venc_testcase_base.cpp:1084)
==142== by 0x42B267: venc_testcase_crop_h264 (venc_unittest.cpp:359)
==142== by 0x427237: venc_unitest_testcase_crop_h264_Test::TestBody() (venc_unittest.cpp:54)
==142== by 0x48C16F: void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (in /usr/bin/venc/venc_unit_test)
==142== by 0x485697: void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (in /usr/bin/venc/venc_unit_test)
==142== by 0x4612B3: testing::Test::Run() (in /usr/bin/venc/venc_unit_test)
==142== by 0x461B67: testing::TestInfo::Run() (in /usr/bin/venc/venc_unit_test)
==142==
==142== 144 bytes in 1 blocks are definitely lost in loss record 2 of 4
==142== at 0x484A0D0: calloc (vg_replace_malloc.c:1328)
==142== by 0x42352B: stream_get_task(void*) (venc_unittest_comm.cpp:876)
==142== by 0x4899EF7: start_thread (pthread_create.c:458)
==142== by 0x4BF33FB: thread_start (clone.S:84)
==142==
==142== 1,856 bytes in 29 blocks are indirectly lost in loss record 3 of 4
==142== at 0x48450F0: malloc (vg_replace_malloc.c:381)
==142== by 0x4229F3: frame_buffer_malloc(vs_venc_unittest_channel_s*, vs_bool_t) (venc_unittest_comm.cpp:539)
==142== by 0x422E9B: frame_send_task(void*) (venc_unittest_comm.cpp:673)
==142== by 0x4899EF7: start_thread (pthread_create.c:458)
==142== by 0x4BF33FB: thread_start (clone.S:84)
==142==
==142== 1,920 (64 direct, 1,856 indirect) bytes in 1 blocks are definitely lost in loss record 4 of 4
==142== at 0x48450F0: malloc (vg_replace_malloc.c:381)
==142== by 0x4229F3: frame_buffer_malloc(vs_venc_unittest_channel_s*, vs_bool_t) (venc_unittest_comm.cpp:539)
==142== by 0x422E9B: frame_send_task(void*) (venc_unittest_comm.cpp:673)
==142== by 0x4899EF7: start_thread (pthread_create.c:458)
==142== by 0x4BF33FB: thread_start (clone.S:84)
==142==
==142== LEAK SUMMARY:
==142== definitely lost: 208 bytes in 2 blocks
==142== indirectly lost: 1,856 bytes in 29 blocks
==142== possibly lost: 0 bytes in 0 blocks
==142== still reachable: 68 bytes in 1 blocks
==142== suppressed: 0 bytes in 0 blocks
==142==
==142== Use --track-origins=yes to see where uninitialised values come from
==142== For lists of detected and suppressed errors, rerun with: -s
==142== ERROR SUMMARY: 431 errors from 27 contexts (suppressed: 0 from 0)
三 内核态mem leak问题查找
3.1 调osal mem api部分
3.1.1 osal加入监测点
修改vs_os_mem.c中的实现,加入每次分配释放的
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
#include <linux/kprobes.h>
#include <asm/traps.h>
#define MEM_MALLOC_CNT_MAX 10240
static __u64 memory_leak_addr[MEM_MALLOC_CNT_MAX];
static unsigned int memory_leak_size[MEM_MALLOC_CNT_MAX];
static unsigned int memory_leak_cnt = 0;
static unsigned int memory_leak_check_enable = 0;
static struct vs_os_mutex_s *p_memory_leak_lock = NULL;
void vs_os_memory_leak_check_start(void)
{
p_memory_leak_lock = (struct vs_os_mutex_s *)kmalloc(sizeof(struct vs_os_mutex_s), OS_GFP_KERNEL);
vs_os_mutex_init(p_memory_leak_lock);
memory_leak_cnt = 0;
memset(memory_leak_addr,0,sizeof(__u64)*MEM_MALLOC_CNT_MAX);
memset(memory_leak_size,0,sizeof(unsigned int)*MEM_MALLOC_CNT_MAX);
memory_leak_check_enable = 1;
printk("vs_os_memory_leak_check_start\n");
dump_stack();
}
void vs_os_memory_leak_check_stop(void)
{
unsigned int index = 0;
memory_leak_check_enable = 0;
for(index = 0;index < memory_leak_cnt;index++) {
if(0 != memory_leak_addr[index])
printk("memory_leak index %d addr 0x%llx size 0x%08x\n",index,memory_leak_addr[index],memory_leak_size[index]);
}
if(NULL != p_memory_leak_lock) {
vs_os_mutex_destroy(p_memory_leak_lock);
kfree(p_memory_leak_lock);
p_memory_leak_lock = NULL;
}
printk("vs_os_memory_leak_check_stop\n");
}
#endif
void *vs_os_kmalloc(unsigned long size, unsigned int gfp_flag)
{
void *ptr;
if (gfp_flag == OS_GFP_KERNEL) {
ptr = kmalloc(size, GFP_KERNEL);
} else if (gfp_flag == OS_GFP_ATOMIC) {
ptr = kmalloc(size, GFP_ATOMIC);
} else {
return NULL;
}
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
if(memory_leak_check_enable == 1) {
vs_os_mutex_lock(p_memory_leak_lock);
//printk("malloc pointer [0x%px] size 0x%08x cnt %d\n", ptr,size,memory_leak_cnt);
if(memory_leak_cnt < MEM_MALLOC_CNT_MAX) {
memory_leak_addr[memory_leak_cnt] = (__u64)ptr;
memory_leak_size[memory_leak_cnt] = size;
//if(memory_leak_cnt < 40) {
// printk("vs_os_kmalloc leak index %d addr 0x%llx size 0x%08x\n",memory_leak_cnt,memory_leak_addr[memory_leak_cnt],memory_leak_size[memory_leak_cnt]);
// dump_stack();
//}
memory_leak_cnt++;
} else {
printk("should bigger MEM_MALLOC_CNT_MAX\n");
}
vs_os_mutex_unlock(p_memory_leak_lock);
}
#endif
return ptr;
}
void vs_os_kfree(const void *addr)
{
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
if(memory_leak_check_enable == 1) {
unsigned int index = 0;
__u64 tmp_addr = (__u64)addr;
vs_os_mutex_lock(p_memory_leak_lock);
for(index = 0;index < memory_leak_cnt;index++) {
if(tmp_addr == memory_leak_addr[index]) {
memory_leak_addr[index] = 0;
break;
}
}
if(index >= memory_leak_cnt)
printk("free pointer [0x%px] and not find\n", addr);
//else
// printk("free pointer [0x%px] index %d\n", ptr,index);
vs_os_mutex_unlock(p_memory_leak_lock);
}
#endif
kfree(addr);
}
void *vs_os_vmalloc(unsigned long size)
{
void *ptr = vmalloc(size);
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
if(memory_leak_check_enable == 1) {
vs_os_mutex_lock(p_memory_leak_lock);
//printk("malloc pointer [0x%px] size 0x%08x cnt %d\n", ptr,size,memory_leak_cnt);
if(memory_leak_cnt < MEM_MALLOC_CNT_MAX) {
memory_leak_addr[memory_leak_cnt] = (__u64)ptr;
memory_leak_size[memory_leak_cnt] = size;
//if(memory_leak_cnt < 40) {
// printk("vs_os_vmalloc leak index %d addr 0x%llx size 0x%08x\n",memory_leak_cnt,memory_leak_addr[memory_leak_cnt],memory_leak_size[memory_leak_cnt]);
// dump_stack();
//}
memory_leak_cnt++;
} else {
printk("should bigger MEM_MALLOC_CNT_MAX\n");
}
vs_os_mutex_unlock(p_memory_leak_lock);
}
#endif
return ptr;
}
void vs_os_vfree(const void *addr)
{
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
if(memory_leak_check_enable == 1) {
unsigned int index = 0;
__u64 tmp_addr = (__u64)addr;
vs_os_mutex_lock(p_memory_leak_lock);
for(index = 0;index < memory_leak_cnt;index++) {
if(tmp_addr == memory_leak_addr[index]) {
memory_leak_addr[index] = 0;
break;
}
}
if(index >= memory_leak_cnt)
printk("free pointer [0x%px] and not find\n", addr);
//else
// printk("free pointer [0x%px] index %d\n", ptr,index);
vs_os_mutex_unlock(p_memory_leak_lock);
}
#endif
vfree(addr);
}
3.1.2 start/stop mem leak
Vs_venc_init.c中
static int vs_venc_probe(struct platform_device *pdev)
{
int ret,i;
struct vs_venc_priv *p_venc_priv;
struct proc_dir_entry *p_proc;
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
vs_os_memory_leak_check_start();
#endif
略
static int vs_venc_remove(struct platform_device *pdev)
{
vs_int32_t i = 0,ret;
struct vs_venc_priv *p_venc_priv = platform_get_drvdata(pdev);
ret = vs_venc_mod_deinit(pdev);
if (ret) {
pr_err("failed to vs_ysum_dev_init i %d p_ysum_res_name%s\n",i,p_venc_res_name);
ret = -EINVAL;
}
venc_module_deregister();
misc_deregister(&vs_venc_dev);
venc_proc_destroy(p_venc_priv->proc_dir);
remove_proc_entry(VS_VENC_PROC_DIR, NULL);
#ifdef VS_VENC_DEBUG_MEMORY_LEAK
vs_os_memory_leak_check_stop();
#endif
printk("vs_venc_remove successful!\n");
return ret;
}
3.1.3 结果分析
[ 2023.816434] memory_leak index 4 addr 0xffff000026332800 size 0x00000108
[ 1979.736104] vs_os_kmalloc leak index 4 addr 0xffff000026332800 size 0x00000108
[ 1979.744005] [vs_drv_venc_core.c:915][venc_prepare_process] venc_prepare_process Enter
[ 1979.752465] CPU: 0 PID: 299 Comm: insmod Tainted: G O 5.4.94 #42
[ 1979.759928] Hardware name: Visinextek Technologies, Inc. vs819-emulation (DT)
[ 1979.767189] Call trace:
[ 1979.769936] dump_backtrace+0x0/0x150
[ 1979.773786] show_stack+0x14/0x20
[ 1979.777297] dump_stack+0xbc/0x118
[ 1979.781422] vs_os_kmalloc+0xfc/0x128 [vs_osal]
[ 1979.787978] malloc+0x10/0x18 [vs_venc]
[ 1979.793683] EWLmalloc+0x18/0x50 [vs_venc]
[ 1979.799608] EWLInit+0x54/0x5d0 [vs_venc]
[ 1979.805446] vs_venc_core_init+0xc4/0x1b0 [vs_venc]
[ 1979.812135] vs_venc_mod_init+0x158/0x2f0 [vs_venc]
[ 1979.818829] vs_venc_probe+0x98/0x1b0 [vs_venc]
[ 1979.823608] platform_drv_probe+0x50/0xa0
[ 1979.827840] really_probe+0x108/0x360
[ 1979.831729] driver_probe_device+0x58/0x100
[ 1979.836126] device_driver_attach+0x6c/0x90
[ 1979.840509] __driver_attach+0x84/0xc8
[ 1979.844454] bus_for_each_dev+0x74/0xc8
[ 1979.848483] driver_attach+0x20/0x28
[ 1979.852252] bus_add_driver+0x148/0x1f0
[ 1979.856288] driver_register+0x60/0x110
[ 1979.860298] __platform_driver_register+0x40/0x48
[ 1979.866809] vs_venc_driver_init+0x20/0x28 [vs_venc]
[ 1979.871993] do_one_initcall+0x5c/0x1c8
[ 1979.876063] do_init_module+0x54/0x1f0
[ 1979.880040] load_module+0x1c88/0x2208
[ 1979.884014] __do_sys_init_module+0x178/0x210
[ 1979.888584] __arm64_sys_init_module+0x18/0x20
[ 1979.893257] el0_svc_common.constprop.2+0x64/0x168
[ 1979.898250] el0_svc_handler+0x20/0x80
[ 1979.902170] el0_svc+0x8/0x204
Ko加载是的那次 EWLInit调用没有EWLRelease 要加上
vs_venc_core_deinit中判断条件写错了
if (NULL == p_task->p_ewl_inst) { /// 应该是不等于
EWLRelease(p_task->p_ewl_inst);
p_task->p_ewl_inst = NULL;
}
[ 2023.824270] memory_leak index 5 addr 0xffff000026bc6400 size 0x000000f8
[ 2023.902878] memory_leak index 35 addr 0xffff000026bc6600 size 0x000000f8
[ 2023.909986] memory_leak index 36 addr 0xffff000026bc6100 size 0x000000f8
以上同一处地方
[ 1979.910505] vs_os_kmalloc leak index 5 addr 0xffff000026bc6400 size 0x000000f8
[ 1979.918894] CPU: 0 PID: 299 Comm: insmod Tainted: G O 5.4.94 #42
[ 1979.926371] Hardware name: Visinextek Technologies, Inc. vs819-emulation (DT)
[ 1979.933639] Call trace:
[ 1979.936350] dump_backtrace+0x0/0x150
[ 1979.940197] show_stack+0x14/0x20
[ 1979.943698] dump_stack+0xbc/0x118
[ 1979.947812] vs_os_kmalloc+0xfc/0x128 [vs_osal]
[ 1979.954279] vs_venc_vendor_open+0x18/0x58 [vs_venc]
[ 1979.961100] EWLInit+0x94/0x5d0 [vs_venc]
[ 1979.966942] vs_venc_core_init+0xc4/0x1b0 [vs_venc]
[ 1979.973647] vs_venc_mod_init+0x158/0x2f0 [vs_venc]
[ 1979.980357] vs_venc_probe+0x98/0x1b0 [vs_venc]
[ 1979.985113] platform_drv_probe+0x50/0xa0
[ 1979.989353] really_probe+0x108/0x360
[ 1979.993225] driver_probe_device+0x58/0x100
[ 1979.997619] device_driver_attach+0x6c/0x90
[ 1980.002001] __driver_attach+0x84/0xc8
[ 1980.005943] bus_for_each_dev+0x74/0xc8
[ 1980.009972] driver_attach+0x20/0x28
[ 1980.013742] bus_add_driver+0x148/0x1f0
[ 1980.017780] driver_register+0x60/0x110
[ 1980.021788] __platform_driver_register+0x40/0x48
[ 1980.028304] vs_venc_driver_init+0x20/0x28 [vs_venc]
[ 1980.033494] do_one_initcall+0x5c/0x1c8
[ 1980.037560] do_init_module+0x54/0x1f0
[ 1980.041538] load_module+0x1c88/0x2208
[ 1980.045513] __do_sys_init_module+0x178/0x210
[ 1980.050080] __arm64_sys_init_module+0x18/0x20
[ 1980.054755] el0_svc_common.constprop.2+0x64/0x168
[ 1980.059756] el0_svc_handler+0x20/0x80
[ 1980.063672] el0_svc+0x8/0x204
EWLRelease中调用vs_venc_vendor_close释放资源
[ 2023.831824] memory_leak index 9 addr 0xffff000026332a00 size 0x00000200
[ 2023.838954] memory_leak index 10 addr 0xffff000026332c00 size 0x00000200
[ 2023.846102] memory_leak index 11 addr 0xffff000026332e00 size 0x00000200
[ 2023.853245] memory_leak index 12 addr 0xffff000026720200 size 0x00000200
[ 2023.860280] memory_leak index 13 addr 0xffff000026720000 size 0x00000200
[ 2023.867395] memory_leak index 14 addr 0xffff000081e52000 size 0x00000200
[ 2023.874510] memory_leak index 15 addr 0xffff000081e52200 size 0x00000200
[ 2023.881618] memory_leak index 16 addr 0xffff000081e52400 size 0x00000200
[ 2023.888640] memory_leak index 17 addr 0xffff000081e52600 size 0x00000200
[ 2023.895752] memory_leak index 18 addr 0xffff000081e52800 size 0x00000200
以上同一处地方
[ 1988.676381] vs_os_kmalloc leak index 18 addr 0xffff000081e52800 size 0x00000200
[ 1988.686326] CPU: 0 PID: 307 Comm: venc_unit_test Tainted: G O 5.4.94 #42
[ 1988.694521] Hardware name: Visinextek Technologies, Inc. vs819-emulation (DT)
[ 1988.701790] Call trace:
[ 1988.704515] dump_backtrace+0x0/0x150
[ 1988.708363] show_stack+0x14/0x20
[ 1988.711872] dump_stack+0xbc/0x118
[ 1988.715987] vs_os_kmalloc+0xfc/0x128 [vs_osal]
[ 1988.722385] vs_venc_nalheader_init+0x110/0x208 [vs_venc]
[ 1988.729399] vs_venc_chn_attr_init+0x168/0x320 [vs_venc]
[ 1988.736300] vs_venc_chn_create+0x78/0x158 [vs_venc]
[ 1988.742849] vs_drv_venc_chn_create+0x28/0xe8 [vs_venc]
[ 1988.749640] vs_venc_ioctl+0x29e4/0x44a8 [vs_venc]
[ 1988.754666] do_vfs_ioctl+0xb8/0xb88
[ 1988.758427] ksys_ioctl+0x44/0x90
[ 1988.761931] __arm64_sys_ioctl+0x1c/0xa8
[ 1988.766090] el0_svc_common.constprop.2+0x64/0x168
[ 1988.771094] el0_svc_handler+0x20/0x80
[ 1988.775021] el0_svc+0x8/0x204
vs_venc_nalheader_deinit 没有被调用需要加上
3.2 非调用osal mem api部分
搜索代码中(部分vendor的代码,比较少)直接调用kmalloc、vmalloc、kfree,手动检查
3.3 使用kernel自带的CONFIG_DEBUG_KMEMLEAK
参见 https://blog.youkuaiyun.com/AXW2013/article/details/79603205