一、问题描述
为了分析GPU内核驱动是否由内存泄露,运行glmark2、mpv、x11perf等应用,通过cat /proc/slabinfo、slabtop、cat /proc/meminfo等命令观察slab等内存是否持续增长。发现kmalloc-256持续增长。
二、问题分析
2.1 kmemleak分析内核内存泄露
具体步骤如下:
1、将内核配置CONFIG_DEBUG_KERNEL打开,重新编译内核后安装。
注:kmemleak具体使用请参看文章《内核检查内存泄漏的工具 --- kmemleak》
2、安装好内核后重启主机,运行glmark2。
3、运行命令echo scan > /sys/kernel/debug/kmemleak触发一次扫描。
4、过一段时间后,运行命令cat /sys/kernel/debug/kmemleak > result生成扫描结果。
查看result文件,有大量如下信息:
unreferenced object 0xffff9e9bbcc93500 (size 256):
comm "glmark2", pid 2820, jiffies 4310974849 (age 942.056s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<0000000027416409>] kmem_cache_alloc_trace+0x155/0x480
[<000000002cec6d5e>] drm_pvr_srvkm_init+0x79/0x250 [xdxgpu]
[<0000000019ca5bcd>] drm_ioctl_kernel+0xac/0xf0 [drm]
[<0000000028c47c7f>] drm_ioctl+0x271/0x490 [drm]
[<00000000e2ea346f>] xdx_fops_ioctl+0x7b/0xb0 [xdxgpu]
[<00000000d2e5d9d2>] __x64_sys_ioctl+0x91/0xc0
[<00000000234fe19f>] do_syscall_64+0x59/0xc0
[<000000006aa64ef0>] entry_SYSCALL_64_after_hwframe+0x44/0xae
unreferenced object 0xffff9e9b5f62ba20 (size 32):
comm "glmark2", pid 2820, jiffies 4310974849 (age 942.056s)
hex dump (first 32 bytes):
04 0b 00 00 00 00 00 00 04 0b 00 00 00 00 00 00 ................
00 60 65 c7 9a 9e ff ff 80 b8 62 5f 9b 9e ff ff .`e.......b_....
backtrace:
[<000000004c92ba8e>] __kmalloc+0x177/0x4d0
[<0000000095a744ac>] OSAllocZMem+0x2d/0x130 [xdxgpu]
[<000000000904d2d1>] OSConnectionPrivateDataInit+0x1c/0x80 [xdxgpu]
[<000000008e42e1c5>] drm_pvr_srvkm_init+0x99/0x250 [xdxgpu]
[<0000000019ca5bcd>] drm_ioctl_kernel+0xac/0xf0 [drm]
[<0000000028c47c7f>] drm_ioctl+0x271/0x490 [drm]
[<00000000e2ea346f>] xdx_fops_ioctl+0x7b/0xb0 [xdxgpu]
[<00000000d2e5d9d2>] __x64_sys_ioctl+0x91/0xc0
[<00000000234fe19f>] do_syscall_64+0x59/0xc0
[<000000006aa64ef0>] entry_SYSCALL_64_after_hwframe+0x44/0xae
2.2 代码分析
对于第一个调用堆栈,找到相应的内核驱动KMD函数,进而找到发起ioctl的用户态驱动UMD函数。先看了KMD相关函数,发现可能会存在很多问题,例如connection共享引起的内存泄露等问题,所以必须结合UMD的代码分析调用逻辑。
debug UMD相关代码
为了理清上层的调用逻辑,对glmark2的如下函数打了断点进行跟踪:
PVRFDSyncOpen
OpenServicesDevice
OpenCloExec
得到如下结果:
1、首先函数PVRDRICreateScreen->OpenCloExec打开设备文件/dev/dri/renderD128得到fd值为5.
(gdb) bt
#0 OpenCloExec (pszPath=0x7fffffff8df0 "/dev/dri/renderD128") at common/linux_drm_utils.c:89
#1 0x00007ffff73a4789 in OpenMinor (iMinor=128) at common/linux_drm_utils.c:150
#2 0x00007ffff73a47b8 in OpenPVRDevMinor (iMinor=128) at common/linux_drm_utils.c:155
#3 0x00007ffff73a4d4a in PVRDRMOpenNearestRender (iMatchFd=4) at common/linux_drm_utils.c:408
#4 0x00007ffff73328a1 in PVRDRICreateScreen (iFd=4, iDisplayFd=4) at lws/pvr_dri_support/pvrscreen_impl.c:249
#5 0x00007ffff73348a8 in DRIMODCreateScreen (psDRIScreen=0x5555556ceac0, iFD=4, bUseInvalidate=true, pvLoaderPrivate=0x5555556b00b0,
pppsConfigs=0x7fffffffd080, piMaxGLES1Version=0x7fffffffd078, piMaxGLES2Version=0x7fffffffd07c)
at lws/pvr_dri_support/pvrdri_mod.c:199
#6 0x00007ffff73edb2b in DRISUPCreateScreen (psDRIScreen=0x5555556ceac0, iFD=4, bUseInvalidate=true, pvLoaderPrivate=0x5555556b00b0,
pppsConfigs=0x7fffffffd080, piMaxGLES1Version=0x7fffffffd078, piMaxGLES2Version=0x7fffffffd07c)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/pvr/pvrcompat.c:294
#7 0x00007ffff73eabbb in PVRDRIInitScreen (psDRIScreen=0x5555556ceac0)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/pvr/pvrdri.c:216
#8 0x00007ffff73dd03e in driCreateNewScreen2 (scrn=0, fd=4, extensions=0x7ffff7801aa0 <loader_extensions>,
driver_extensions=0x7ffff7425c40 <pvr_driver_extensions>, driver_configs=0x7fffffffdb60, data=0x5555556b00b0)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/dri/dri_util.c:159
#9 0x00007ffff77d5bcb in dri3_create_screen (screen=0, priv=0x5555556aca80)
at ../../../../../../../../../drivers/imported/mesa/src/glx/dri3_glx.c:939
#10 0x00007ffff77be5c1 in AllocAndFetchScreenConfigs (dpy=0x555555699350, priv=0x5555556aca80)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxext.c:824
#11 0x00007ffff77be9c1 in __glXInitialize (dpy=0x555555699350)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxext.c:953
#12 0x00007ffff77b7d47 in glXQueryVersion (dpy=0x555555699350, major=0x7fffffffdca0, minor=0x7fffffffdca4)at ../../../../../../../../../drivers/imported/mesa/src/glx/glxcmds.c:537
#13 0x0000555555578bee in glad_glx_find_core_glx ()
#14 0x0000555555578d41 in gladLoadGLXUserPtr ()
#15 0x000055555557379d in GLStateGLX::init_display (native_display=<optimized out>, visual_config=..., this=0x7fffffffddf0)
at ../src/gl-state-glx.cpp:44
#16 GLStateGLX::init_display (this=this@entry=0x7fffffffddf0, native_display=<optimized out>, visual_config=...)
at ../src/gl-state-glx.cpp:34
#17 0x000055555557151b in CanvasGeneric::init (this=0x7fffffffde60) at ../src/canvas-generic.cpp:43
#18 0x000055555556e558 in main (argc=<optimized out>, argv=<optimized out>) at ../src/main.cpp:210
(gdb) p iFd
$1 = 5
2、函数ConnectionCreate->OpenServicesDevice->LinuxOpenServicesDevice->_GetFd->dup最终对fd 5调用dup,内核将fd为5的file的引用计数增1,并生成一个fd返回给用户态.
(gdb) bt
#0 OpenServicesDevice (hOSConnection=6, iDeviceID=1555705856, ui32SrvFlags=32767, phServices=0x7fffffffce95,
pui32CapabilityFlags=0x555555726330, psDeviceID=0x7ffff6f9de9e <PVRSRVGetAppHint+207>, pui64PackedBvnc=0x7fffffffce60)
at services/client/env/linux/pvr_bridge_u.c:591
#1 0x00007ffff6f570a8 in ConnectionCreate (ppsConnectionOut=0x5555556d2eb8, hOSConnection=5, iDeviceID=-1, ui32SrvFlags=32)
at services/client/common/connection.c:234
#2 0x00007ffff6f65222 in _ConnectionCreateDevice (ppsConnection=0x5555556d2eb8, hOSConnection=5, iDeviceID=-1, ui32SrvFlags=0)
at services/client/common/srvcore.c:297
#3 0x00007ffff6f653e7 in PVRSRVConnectionCreate (ppsConnection=0x5555556d2eb8, hOSConnection=5, ui32SrvFlags=0)
at services/client/common/srvcore.c:346
#4 0x00007ffff7332a06 in PVRDRICreateScreen (iFd=4, iDisplayFd=4) at lws/pvr_dri_support/pvrscreen_impl.c:299
#5 0x00007ffff73348a8 in DRIMODCreateScreen (psDRIScreen=0x5555556ceac0, iFD=4, bUseInvalidate=true, pvLoaderPrivate=0x5555556b00b0,
pppsConfigs=0x7fffffffd080, piMaxGLES1Version=0x7fffffffd078, piMaxGLES2Version=0x7fffffffd07c)
at lws/pvr_dri_support/pvrdri_mod.c:199
#6 0x00007ffff73edb2b in DRISUPCreateScreen (psDRIScreen=0x5555556ceac0, iFD=4, bUseInvalidate=true, pvLoaderPrivate=0x5555556b00b0,
pppsConfigs=0x7fffffffd080, piMaxGLES1Version=0x7fffffffd078, piMaxGLES2Version=0x7fffffffd07c)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/pvr/pvrcompat.c:294
#7 0x00007ffff73eabbb in PVRDRIInitScreen (psDRIScreen=0x5555556ceac0)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/pvr/pvrdri.c:216
#8 0x00007ffff73dd03e in driCreateNewScreen2 (scrn=0, fd=4, extensions=0x7ffff7801aa0 <loader_extensions>,
driver_extensions=0x7ffff7425c40 <pvr_driver_extensions>, driver_configs=0x7fffffffdb60, data=0x5555556b00b0)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/dri/dri_util.c:159
#9 0x00007ffff77d5bcb in dri3_create_screen (screen=0, priv=0x5555556aca80)
at ../../../../../../../../../drivers/imported/mesa/src/glx/dri3_glx.c:939
#10 0x00007ffff77be5c1 in AllocAndFetchScreenConfigs (dpy=0x555555699350, priv=0x5555556aca80)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxext.c:824
#11 0x00007ffff77be9c1 in __glXInitialize (dpy=0x555555699350)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxext.c:953
#12 0x00007ffff77b7d47 in glXQueryVersion (dpy=0x555555699350, major=0x7fffffffdca0, minor=0x7fffffffdca4)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxcmds.c:537
#13 0x0000555555578bee in glad_glx_find_core_glx ()
#14 0x0000555555578d41 in gladLoadGLXUserPtr ()
#15 0x000055555557379d in GLStateGLX::init_display (native_display=<optimized out>, visual_config=..., this=0x7fffffffddf0)
at ../src/gl-state-glx.cpp:44
#16 GLStateGLX::init_display (this=this@entry=0x7fffffffddf0, native_display=<optimized out>, visual_config=...)
at ../src/gl-state-glx.cpp:34
#17 0x000055555557151b in CanvasGeneric::init (this=0x7fffffffde60) at ../src/canvas-generic.cpp:43
#18 0x000055555556e558 in main (argc=<optimized out>, argv=<optimized out>) at ../src/main.cpp:210
(gdb) p hOSConnection //hOSConnection即PVRDRICreateScreen->OpenCloExec打开设备文件得到的fd值5
$2 = 5
3、后面多次调用PVRFDSyncOpen打开设备文件,因为PVRFDSyncOpen调用LinuxOpenServicesDevice时传入的hOSConnection为-1,所以每次都会打开设备文件,而不会调用dup.
(gdb) bt
#0 PVRFDSyncOpen (piSyncFd=0x7fffffffcd10, bSoftware=true, pszPrefix=0x7ffff723a0e7 "dmat") at common/linux_sync.c:108
#1 0x00007ffff6fa10b5 in PVRSRVSWTimelineCreateI (psDevConnection=0x555555726340, phSWTimeline=0x5555557377c0,
pszSWTimelineName=0x7ffff723a0e7 "dmat", pszaFile=0x7ffff7239c40 "include/pvrsrv_sync_um.h", ui32Line=177)
at services/client/env/linux/pvrsrv_sync_fdsync.c:447
#2 0x00007ffff6f5896f in PVRSRVSWTimelineCreate (psDevConnection=0x555555726340, phSWTimeline=0x5555557377c0,
pszSWTimelineName=0x7ffff723a0e7 "dmat") at include/pvrsrv_sync_um.h:176
#3 0x00007ffff6f59d23 in PVRSRVCreateDmaTransferContext (psDevConnection=0x555555726340, ppsCtx=0x5555556d2fa0)
at services/client/common/dma.c:678
#4 0x00007ffff7354379 in SRV_RGXServicesInit (psSysContext=0x5555556d2ec0, psDevConnection=0x555555726340,
psAppHints=0x7ffff73d6850 <sGlobalData+7056>) at egl/imgeglsup/volcanic/srv_rgx.c:454
#5 0x00007ffff7351bdb in SRV_ServicesInit (psSysContext=0x5555556d2ec0, psDevConnection=0x555555726340,
psAppHints=0x7ffff73d6850 <sGlobalData+7056>) at egl/imgeglsup/srv.c:33
#6 0x00007ffff7332a6a in PVRDRICreateScreen (iFd=4, iDisplayFd=4) at lws/pvr_dri_support/pvrscreen_impl.c:308
#7 0x00007ffff73348a8 in DRIMODCreateScreen (psDRIScreen=0x5555556ceac0, iFD=4, bUseInvalidate=true, pvLoaderPrivate=0x5555556b00b0,
pppsConfigs=0x7fffffffd080, piMaxGLES1Version=0x7fffffffd078, piMaxGLES2Version=0x7fffffffd07c)
at lws/pvr_dri_support/pvrdri_mod.c:199
#8 0x00007ffff73edb2b in DRISUPCreateScreen (psDRIScreen=0x5555556ceac0, iFD=4, bUseInvalidate=true, pvLoaderPrivate=0x5555556b00b0,
pppsConfigs=0x7fffffffd080, piMaxGLES1Version=0x7fffffffd078, piMaxGLES2Version=0x7fffffffd07c)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/pvr/pvrcompat.c:294
#9 0x00007ffff73eabbb in PVRDRIInitScreen (psDRIScreen=0x5555556ceac0)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/pvr/pvrdri.c:216
#10 0x00007ffff73dd03e in driCreateNewScreen2 (scrn=0, fd=4, extensions=0x7ffff7801aa0 <loader_extensions>,
driver_extensions=0x7ffff7425c40 <pvr_driver_extensions>, driver_configs=0x7fffffffdb60, data=0x5555556b00b0)
at ../../../../../../../../../drivers/imported/mesa/src/gallium/frontends/dri/dri_util.c:159
#11 0x00007ffff77d5bcb in dri3_create_screen (screen=0, priv=0x5555556aca80)
at ../../../../../../../../../drivers/imported/mesa/src/glx/dri3_glx.c:939
#12 0x00007ffff77be5c1 in AllocAndFetchScreenConfigs (dpy=0x555555699350, priv=0x5555556aca80)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxext.c:824
#13 0x00007ffff77be9c1 in __glXInitialize (dpy=0x555555699350)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxext.c:953
#14 0x00007ffff77b7d47 in glXQueryVersion (dpy=0x555555699350, major=0x7fffffffdca0, minor=0x7fffffffdca4)
at ../../../../../../../../../drivers/imported/mesa/src/glx/glxcmds.c:537
#15 0x0000555555578bee in glad_glx_find_core_glx ()
#16 0x0000555555578d41 in gladLoadGLXUserPtr ()
#17 0x000055555557379d in GLStateGLX::init_display (native_display=<optimized out>, visual_config=..., this=0x7fffffffddf0)
at ../src/gl-state-glx.cpp:44
#18 GLStateGLX::init_display (this=this@entry=0x7fffffffddf0, native_display=<optimized out>, visual_config=...)
at ../src/gl-state-glx.cpp:34
#19 0x000055555557151b in CanvasGeneric::init (this=0x7fffffffde60) at ../src/canvas-generic.cpp:43
#20 0x000055555556e558 in main (argc=<optimized out>, argv=<optimized out>) at ../src/main.cpp:210
代码
UMD相关代码如下:
PVRSRV_ERROR IMG_INTERNAL OpenServicesDevice(IMG_OS_CONNECTION hOSConnection,
IMG_INT iDeviceID,
......)
{
......
eError = LinuxOpenServicesDevice(hOSConnection, iDeviceID, &iFd, &psDeviceID->iDeviceID);
......
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
/* Call ioctl to initialise the Services device */
struct drm_pvr_srvkm_init_data sInitData;
sInitData.init_module = PVR_SRVKM_SERVICES_INIT;
if (ioctl(iFd, DRM_IOCTL_PVR_SRVKM_INIT, &sInitData) != 0)
{
.....
}
#endif
.....
}
PVRSRV_ERROR IMG_INTERNAL LinuxOpenServicesDevice(IMG_OS_CONNECTION hOSConnection,
......)
{
.....
eError = _GetFd(hOSConnection, iDeviceMinor, &iFd, &iDeviceID);
.....
}
static PVRSRV_ERROR _GetFd(IMG_OS_CONNECTION hOSConnection,
.....)
{
IMG_INT iFd;
IMG_INT iFoundDeviceMinor = -1;
/* prefer hOSConnection over a particular device */
if (hOSConnection >= 0)
{
iFd = dup(hOSConnection);
.......
}
/* then a particular device */
else if (-1 != iDeviceMinor)
{
iFd = PVRDRMOpenRender(iDeviceMinor, &iFoundDeviceMinor); //最终调到OpenCloExec
.......
}
else
{
iFd = PVRDRMOpenRender(PVR_DRM_MINOR_RENDER_START, &iFoundDeviceMinor);
......
}
......
}
static int OpenCloExec(const char *pszPath)
{
....
#ifdef O_CLOEXEC
iFd = open(pszPath, O_RDWR | O_CLOEXEC, 0);
if (iFd == -1 && errno == EINVAL)
#endif
{
iFd = OpenCloExecFallback(pszPath);
}
......
}
PVRSRV_ERROR PVRFDSyncOpen(int *piSyncFd, bool bSoftware,
const char *pszPrefix)
{
.....
#if defined(USE_PVRSYNC_DEVNODE)
eError = OpenPvrSyncDev(&iSyncFd, &iDeviceFd);
#else
eError = LinuxOpenServicesDevice(-1, -1, &iSyncFd, &iDeviceFd);
#endif
if (eError != PVRSRV_OK)
{
......
}
#if !defined(USE_PVRSYNC_DEVNODE)
/* Call ioctl to initialise the SYNC device */
struct drm_pvr_srvkm_init_data sInitData;
sInitData.init_module = PVR_SRVKM_SYNC_INIT;
if (ioctl(iSyncFd, DRM_IOCTL_PVR_SRVKM_INIT, &sInitData) != 0)
{
....
}
#endif
KMD相关代码如下:
------------------------打开设备文件会调用该函数------------------------
int xdx_drm_open(struct drm_device *ddev, struct drm_file *file_priv)
{
struct xdx_device *xdev = drm_to_xdev(ddev);
struct xdx_drm_fpriv *fpriv;
int ret;
file_priv->driver_priv = NULL;
#ifndef NO_HARDWARE
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
dev_err(xdev->dev, "failed to pm sync (%d)\n", ret);
goto pm_put;
}
#endif
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
if (unlikely(!fpriv)) {
ret = -ENOMEM;
goto out_suspend;
}
fpriv->dfile = file_priv;
mutex_lock(&xdev->connection_mutex);
ret = idr_alloc(&xdev->connection_idr, file_priv,
1, 0, GFP_KERNEL);
mutex_unlock(&xdev->connection_mutex);
if (ret < 0) {
dev_err(xdev->dev, "failed to alloc idr (%d)\n", ret);
goto err_idr;
}
fpriv->connection_id = ret;
ret = xdx_gfx_drm_open(xdev, fpriv);
if (ret) {
dev_err(xdev->dev, "gfx: failed to open device node (%d)\n", ret);
goto err_gfx;
}
file_priv->driver_priv = fpriv;
goto out_suspend;
.......
}
int xdx_gfx_drm_open(struct xdx_device *xdev, struct xdx_drm_fpriv *fpriv)
{
int ret;
#if (PVRSRV_DEVICE_INIT_MODE != PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
ret = PVRSRVDeviceServicesOpen(xdev->gfx.pvrdev, fpriv);
#endif
return ret;
}
------------------------UMD ioctl最终KMD会调用该函数------------------------
int drm_pvr_srvkm_init(struct drm_device *dev, void *arg, struct drm_file *psDRMFile)
{
struct drm_pvr_srvkm_init_data *data = arg;
struct pvr_drm_private *priv = dev->dev_private;
int iErr = 0;
switch (data->init_module)
{
case PVR_SRVKM_SYNC_INIT:
{
iErr = PVRSRVDeviceSyncOpen(priv->dev_node, psDRMFile);
break;
}
case PVR_SRVKM_SERVICES_INIT:
{
iErr = PVRSRVDeviceServicesOpen(priv->dev_node, psDRMFile);
break;
}
......
}
......
}
static int PVRSRVDeviceServicesOpen(PVRSRV_DEVICE_NODE *psDeviceNode,
struct xdx_drm_fpriv *fpriv)
{
static DEFINE_MUTEX(sDeviceInitMutex);
PVRSRV_DATA *psPVRSRVData = PVRSRVGetPVRSRVData();
ENV_CONNECTION_PRIVATE_DATA sPrivData;
PVRSRV_CONNECTION_PRIV *psConnectionPriv;
PVRSRV_ERROR eError;
int iErr = 0;
if (!psPVRSRVData)
......
mutex_lock(&sDeviceInitMutex);
......
if (fpriv->pvr_fpriv == NULL)
{
psConnectionPriv = kzalloc(sizeof(*psConnectionPriv), GFP_KERNEL);
if (!psConnectionPriv)
{
PVR_DPF((PVR_DBG_ERROR, "%s: No memory to allocate driver_priv data", __func__));
iErr = -ENOMEM;
mutex_unlock(&sDeviceInitMutex);
goto fail_alloc_connection_priv;
}
}
else
{
psConnectionPriv = (PVRSRV_CONNECTION_PRIV*)fpriv->pvr_fpriv;
}
if (psDeviceNode->eDevState == PVRSRV_DEVICE_STATE_INIT)
{
eError = PVRSRVCommonDeviceInitialise(psDeviceNode);
if (eError != PVRSRV_OK)
{
......
}
#if defined(SUPPORT_RGX)
PVRGpuTraceInitIfEnabled(psDeviceNode);
#endif
}
mutex_unlock(&sDeviceInitMutex);
sPrivData.psDevNode = psDeviceNode;
eError = PVRSRVCommonConnectionConnect(&psConnectionPriv->pvConnectionData,
(void *)&sPrivData);
if (eError != PVRSRV_OK)
{
iErr = -ENOMEM;
goto fail_connect;
}
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
psConnectionPriv->pfDeviceRelease = PVRSRVCommonConnectionDisconnect;
#endif
fpriv->pvr_fpriv = (void*)psConnectionPriv;
goto out;
....
}
static int PVRSRVDeviceSyncOpen(PVRSRV_DEVICE_NODE *psDeviceNode,
struct xdx_drm_fpriv *fpriv)
{
PVRSRV_DATA *psPVRSRVData = PVRSRVGetPVRSRVData();
CONNECTION_DATA *psConnection = NULL;
ENV_CONNECTION_PRIVATE_DATA sPrivData;
PVRSRV_CONNECTION_PRIV *psConnectionPriv;
PVRSRV_ERROR eError;
int iErr = 0;
if (!psPVRSRVData)
{
PVR_DPF((PVR_DBG_ERROR, "%s: No device data", __func__));
iErr = -ENODEV;
goto out;
}
if (fpriv->pvr_fpriv == NULL)
{
/* Allocate psConnectionPriv (stores private data and release pfn under driver_priv) */
psConnectionPriv = kzalloc(sizeof(*psConnectionPriv), GFP_KERNEL);
if (!psConnectionPriv)
{
PVR_DPF((PVR_DBG_ERROR, "%s: No memory to allocate driver_priv data", __func__));
iErr = -ENOMEM;
goto out;
}
}
else
{
psConnectionPriv = (PVRSRV_CONNECTION_PRIV*)fpriv->pvr_fpriv;
}
/* Allocate connection data area, no stats since process not registered yet */
psConnection = kzalloc(sizeof(*psConnection), GFP_KERNEL);
if (!psConnection)
{
PVR_DPF((PVR_DBG_ERROR, "%s: No memory to allocate connection data", __func__));
iErr = -ENOMEM;
goto fail_alloc_connection;
}
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
psConnectionPriv->pvConnectionData = (void*)psConnection;
#else
psConnectionPriv->pvSyncConnectionData = (void*)psConnection;
#endif
sPrivData.psDevNode = psDeviceNode;
/* Call environment specific connection data init function */
eError = OSConnectionPrivateDataInit(&psConnection->hOsPrivateData, &sPrivData);
if (eError != PVRSRV_OK)
{
PVR_DPF((PVR_DBG_ERROR, "%s: OSConnectionPrivateDataInit() failed (%s)",
__func__, PVRSRVGetErrorString(eError)));
goto fail_private_data_init;
}
#if defined(SUPPORT_NATIVE_FENCE_SYNC) && !defined(USE_PVRSYNC_DEVNODE)
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
iErr = pvr_sync_open(psConnectionPriv->pvConnectionData, fpriv->dfile);
#else
iErr = pvr_sync_open(psConnectionPriv->pvSyncConnectionData, fpriv->dfile);
#endif
if (iErr)
{
PVR_DPF((PVR_DBG_ERROR, "%s: pvr_sync_open() failed(%d)",
__func__, iErr));
goto fail_pvr_sync_open;
}
#endif
#if defined(SUPPORT_NATIVE_FENCE_SYNC) && !defined(USE_PVRSYNC_DEVNODE)
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
psConnectionPriv->pfDeviceRelease = pvr_sync_close;
#endif
#endif
fpriv->pvr_fpriv = psConnectionPriv;
goto out;
......
}
--------------------关闭文件,文件引用计数减为0时会调用该KMD函数--------------------
void xdx_drm_postclose(struct drm_device *ddev, struct drm_file *file_priv)
{
struct xdx_device *xdev = drm_to_xdev(ddev);
struct xdx_drm_fpriv *fpriv = (struct xdx_drm_fpriv *) file_priv->driver_priv;
mutex_lock(&xdev->connection_mutex);
idr_remove(&xdev->connection_idr, fpriv->connection_id);
mutex_unlock(&xdev->connection_mutex);
xdx_gfx_drm_release(xdev, file_priv->driver_priv);
}
void xdx_gfx_drm_release(struct xdx_device *xdev, struct xdx_drm_fpriv *fpriv)
{
PVRSRVDeviceRelease(xdev->gfx.pvrdev, fpriv);
}
static void PVRSRVDeviceRelease(PVRSRV_DEVICE_NODE *psDeviceNode,
struct xdx_drm_fpriv *fpriv)
{
PVR_UNREFERENCED_PARAMETER(psDeviceNode);
if (fpriv->pvr_fpriv)
{
PVRSRV_CONNECTION_PRIV *psConnectionPriv = (PVRSRV_CONNECTION_PRIV*)fpriv->pvr_fpriv;
if (psConnectionPriv->pvConnectionData)
{
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
if (psConnectionPriv->pfDeviceRelease)
{
psConnectionPriv->pfDeviceRelease(psConnectionPriv->pvConnectionData);
}
#else
if (psConnectionPriv->pvConnectionData)
PVRSRVCommonConnectionDisconnect(psConnectionPriv->pvConnectionData);
#if defined(SUPPORT_NATIVE_FENCE_SYNC) && !defined(USE_PVRSYNC_DEVNODE)
if (psConnectionPriv->pvSyncConnectionData)
pvr_sync_close(psConnectionPriv->pvSyncConnectionData);
#endif
#endif
}
kfree(fpriv->pvr_fpriv);
fpriv->pvr_fpriv = NULL;
}
}
代码流程
当PVRSRV_DEVICE_INIT_MODE设置为PVRSRV_LINUX_DEV_INIT_ON_OPEN时
打开设备文件流程如下:
1、UMD第一次调用OpenCloExec打开设备文件,KMD调用函数PVRSRVDeviceServicesOpen->PVRSRVCommonConnectionConnect,最终调用kzalloc从slab中分配CONNECTION_DATA,并将指针赋值给pvConnectionData,而CONNECTION_DATA的大小接近256字节,会从kmalloc-256中分配。pfDeviceRelease不会进行设置,所以为NULL.
2、UMD调用OpenServicesDevice->LinuxOpenServicesDevice,最终调用dup,参数fd为第1步打开设备文件返回的fd。
3、UMD调用PVRFDSyncOpen->LinuxOpenServicesDevice打开设备文件,KMD调用函数PVRSRVDeviceServicesOpen->PVRSRVCommonConnectionConnect,最终调用kzalloc从slab中分配CONNECTION_DATA,并将指针赋值给pvConnectionData。pfDeviceRelease不会进行设置,所以为NULL.
4、接着PVRFDSyncOpen对第3步得到的fd调用ioctl,KMD调用dr_pvr_srvkm_init->PVRSRVDeviceSyncOpen->kzalloc分配CONNECTION_DATA,并将指针赋值给pvSyncConnectionData。pfDeviceRelease未设置,所以为NULL。
关闭文件流程如下:
1、对于第一次打开的设备文件并进行了dup操作,当进行两次close后,内核file引用计数变为0,进而KMD调用xdx_drm_postclose->xdx_gfx_drm_release->PVRSRVDeviceRelease->(pvConnectionData不为NULL)PVRSRVCommonConnectionDisconnect最终调用kfree释放CONNECTION_DATA.
2、对PVRFDSyncOpen打开的设备文件进行关闭操作,KMD调用xdx_drm_postclose->xdx_gfx_drm_release->PVRSRVDeviceRelease->(pvConnectionData不为NULL)PVRSRVCommonConnectionDisconnect、(pvSyncConnectionData不为NULL)pvr_sync_close。PVRSRVCommonConnectionDisconnect会释放PVRSRVDeviceServicesOpen创建的CONNECTION_DATA,而pvr_sync_close并不会释放函数PVRSRVDeveiceSyncOpen创建的CONNECTION_DATA,故而造成内存泄露。
当PVRSRV_DEVICE_INIT_MODE设置为PVRSRV_LINUX_DEV_INIT_ON_CONNECT时
打开设备文件流程如下:
1、UMD第一次调用OpenCloExec打开设备文件,KMD不会调用PVRSRVDeviceServicesOpen。
2、UMD调用OpenServicesDevice->LinuxOpenServicesDevice,最终调用dup,参数fd为第1步打开设备文件返回的fd。
3、接着OpenServicesDevice->ioctl,KMD调用dr_pvr_srvkm_init->PVRSRVDeviceServicesOpen分配CONNECTION_DATA,并将指针赋值给pvConnectionData,pfDeviceRelease设置为PVRSRVCommonConnectionDisconnect。
4、UMD调用PVRFDSyncOpen->LinuxOpenServicesDevice打开设备文件,KMD不会调用PVRSRVDeviceServicesOpen。
5、接着PVRFDSyncOpen对第3步得到的fd调用ioctl,KMD调用dr_pvr_srvkm_init->PVRSRVDeviceSyncOpen->kzalloc分配CONNECTION_DATA,并将指针赋值给pvConnectionData,pfDeviceRelease设置为pvr_sync_close。
关闭文件流程如下:
1、对于第一次打开的设备文件并进行了dup操作,当进行两次close后,内核file引用计数变为0,进而KMD调用xdx_drm_postclose->xdx_gfx_drm_release->PVRSRVDeviceRelease->pfDeviceRelease即函数PVRSRVCommonConnectionDisconnect最终调用kfree释放CONNECTION_DATA.
2、对PVRFDSyncOpen打开的设备文件进行关闭操作,KMD调用xdx_drm_postclose->xdx_gfx_drm_release->PVRSRVDeviceRelease->pfDeviceRelease即函数pvr_sync_close,pvr_sync_close并不会释放函数PVRSRVDeveiceSyncOpen创建的CONNECTION_DATA,故而造成内存泄露。
三、修改方法
KMD修改代码如下,UMD只是将PVRSRV_DEVICE_INIT_MODE从PVRSRV_LINUX_DEV_INIT_ON_CONNECT修改为PVRSRV_LINUX_DEV_INIT_ON_OPEN所以省略:
diff --git a/include/drm/config_kernel.h b/include/drm/config_kernel.h
index 71307e0..6389129 100644
--- a/include/drm/config_kernel.h
+++ b/include/drm/config_kernel.h
@@ -130,7 +130,7 @@
#define PVR_GPIO_MODE PVR_GPIO_MODE_GENERAL
#define PVRSRV_ENABLE_PROCESS_STATS
#define PVR_ANNOTATION_MAX_LEN 63
-#define PVRSRV_DEVICE_INIT_MODE PVRSRV_LINUX_DEV_INIT_ON_CONNECT
+#define PVRSRV_DEVICE_INIT_MODE PVRSRV_LINUX_DEV_INIT_ON_OPEN
#define PVR_LINUX_PHYSMEM_MAX_POOL_PAGES 10240
#define PVR_LINUX_PHYSMEM_MAX_EXCESS_POOL_PAGES 20480
#define PVR_PMR_TRANSLATE_UMA_ADDRESSES
diff --git a/xdxgpu/plateform/pvr_device.c b/xdxgpu/plateform/pvr_device.c
index 57112b9..e1b0803 100644
--- a/xdxgpu/plateform/pvr_device.c
+++ b/xdxgpu/plateform/pvr_device.c
@@ -302,6 +302,17 @@ out:
return iErr;
}
+static void PVRSRVSyncConnectionRelease(void *pvDataPtr)
+{
+ CONNECTION_DATA *psConnection = pvDataPtr;
+
+ pvr_sync_close(psConnection);
+ OSConnectionPrivateDataDeInit(psConnection->hOsPrivateData);
+ kfree(psConnection);
+}
+
static int PVRSRVDeviceSyncOpen(PVRSRV_DEVICE_NODE *psDeviceNode,
struct xdx_drm_fpriv *fpriv)
{
@@ -343,6 +354,7 @@ static int PVRSRVDeviceSyncOpen(PVRSRV_DEVICE_NODE *psDeviceNode,
iErr = -ENOMEM;
goto fail_alloc_connection;
}
+
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
psConnectionPriv->pvConnectionData = (void*)psConnection;
#else
@@ -376,7 +388,7 @@ static int PVRSRVDeviceSyncOpen(PVRSRV_DEVICE_NODE *psDeviceNode,
#if defined(SUPPORT_NATIVE_FENCE_SYNC) && !defined(USE_PVRSYNC_DEVNODE)
#if (PVRSRV_DEVICE_INIT_MODE == PVRSRV_LINUX_DEV_INIT_ON_CONNECT)
- psConnectionPriv->pfDeviceRelease = pvr_sync_close;
+ psConnectionPriv->pfDeviceRelease = PVRSRVSyncConnectionRelease;
#endif
#endif
fpriv->pvr_fpriv = psConnectionPriv;
@@ -433,7 +446,7 @@ static void PVRSRVDeviceRelease(PVRSRV_DEVICE_NODE *psDeviceNode,
- if (psConnectionPriv->pvConnectionData) //该修改不是必须的,因为psConnectionPriv->pvConnectionData必然不为NULL
+ if (psConnectionPriv->pvConnectionData || psConnectionPriv->pvSyncConnectionData)
{
#if defined(SUPPORT_NATIVE_FENCE_SYNC) && !defined(USE_PVRSYNC_DEVNODE)
if (psConnectionPriv->pvSyncConnectionData)
- pvr_sync_close(psConnectionPriv->pvSyncConnectionData);
+ PVRSRVSyncConnectionRelease(psConnectionPriv->pvSyncConnectionData);
#endif
#endif
}
通过函数PVRSRVSyncConnectionRelease将pvr_sync_close和释放CONNECTION_DATA等数据进行封装,可以解决该内存泄露问题,修改后验证发现该内存泄露已解决.
这样修改后将PVRSRV_DEVICE_INIT_MODE设置为PVRSRV_LINUX_DEV_INIT_ON_CONNECT或PVRSRV_LINUX_DEV_INIT_ON_OPEN模式都没问题,之所以将PVRSRV_DEVICE_INIT_MODE修改为PVRSRV_LINUX_DEV_INIT_ON_OPEN是因为最开始没分析UMD调用逻辑,所以认为PVRSRV_LINUX_DEV_INIT_ON_CONNECT模式有很多问题.
四、总结
除了内核的内存泄露,进程的用户态也可能发生内存泄露,例如malloc的内存未释放,但是用户态的内存泄露通过杀掉进程就可以被回收,因为进程退出时内核可以回收进程申请的内存。进程的内存使用情况可以通过命令cat /proc/<pid>/status进行查看。