xgltest1退出后kernel crash定位

一、问题描述

启动Xorg后,运行xgltest1一段时间然后ctrl+C结束xgltest1,出现内核崩溃,主机重启。内核版本为 linux-5.4.0-42。

二、问题分析

2.1 崩溃日志分析

查看崩溃时的日志如下:

Oct 13 10:57:41 FPGA-test kernel: [   61.945744] PVR_K:(Error):  2477: SyncCheckpointGetFirmwareAddr called for psSyncCheckpoint<00000000e8b901ab>, but ui32ValidationCheck=0x2b2bb [1745]
Oct 13 10:57:41 FPGA-test kernel: [   61.945771] BUG: kernel NULL pointer dereference, address: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945771] #PF: supervisor read access in kernel mode
Oct 13 10:57:41 FPGA-test kernel: [   61.945772] #PF: error_code(0x0000) - not-present page
Oct 13 10:57:41 FPGA-test kernel: [   61.945772] PGD 0 P4D 0 
Oct 13 10:57:41 FPGA-test kernel: [   61.945773] Oops: 0000 [#1] SMP NOPTI
Oct 13 10:57:41 FPGA-test kernel: [   61.945774] CPU: 4 PID: 2477 Comm: Xorg Tainted: G           OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 13 10:57:41 FPGA-test kernel: [   61.945775] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 13 10:57:41 FPGA-test kernel: [   61.945785] RIP: 0010:RGXCmdHelperAcquireCmdCCB+0x265/0x370 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945786] Code: 48 83 c0 08 39 53 14 48 89 45 e0 0f 86 73 fe ff ff 48 8b 73 18 89 d1 8b 0c 8e 89 08 83 e1 01 75 d1 48 8b 4b 20 89 fe 83 c7 01 <8b> 0c b1 89 48 04 eb c7 b8 68 01 00 00 48 8b 7d e8 65 48 33 3c 25
Oct 13 10:57:41 FPGA-test kernel: [   61.945787] RSP: 0018:ffffba608339b650 EFLAGS: 00010202
Oct 13 10:57:41 FPGA-test kernel: [   61.945787] RAX: ffffba6080be8b80 RBX: ffff967af2a4a808 RCX: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945788] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000001
Oct 13 10:57:41 FPGA-test kernel: [   61.945788] RBP: ffffba608339b678 R08: 0000000000007fff R09: 0000000000008000
Oct 13 10:57:41 FPGA-test kernel: [   61.945788] R10: 0000000000007fff R11: 0000000000000002 R12: ffff967af2a4a898
Oct 13 10:57:41 FPGA-test kernel: [   61.945789] R13: ffff967af8ac3480 R14: ffffffffc0a81c91 R15: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945789] FS:  00007f0b55e2fd00(0000) GS:ffff967aff300000(0000) knlGS:0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945790] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Oct 13 10:57:41 FPGA-test kernel: [   61.945790] CR2: 0000000000000000 CR3: 00000008365b0001 CR4: 0000000000760ee0
Oct 13 10:57:41 FPGA-test kernel: [   61.945791] PKRU: 55555554
Oct 13 10:57:41 FPGA-test kernel: [   61.945791] Call Trace:
Oct 13 10:57:41 FPGA-test kernel: [   61.945802]  PVRSRVRGXKickTA3DKM+0x177e/0x2320 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945805]  ? blk_rq_map_sg+0x334/0x6f0
Oct 13 10:57:41 FPGA-test kernel: [   61.945806]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945807]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945807]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945808]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945808]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945809]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945809]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945810]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945810]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945810]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945811]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945811]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945812]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945812]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945813]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945813]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945813]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945814]  ? __switch_to_asm+0x34/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945814]  ? __switch_to_asm+0x40/0x70
Oct 13 10:57:41 FPGA-test kernel: [   61.945825]  PVRSRVBridgeRGXKickTA3D2+0xf9b/0xfd0 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945834]  BridgedDispatchKM+0x351/0x4e0 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945844]  PVRSRV_BridgeDispatchKM+0x92/0x120 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945853]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945858]  drm_ioctl_kernel+0xb0/0x100 [drm]
Oct 13 10:57:41 FPGA-test kernel: [   61.945862]  drm_ioctl+0x389/0x450 [drm]
Oct 13 10:57:41 FPGA-test kernel: [   61.945871]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945872]  ? __wake_up_common_lock+0x8c/0xc0
Oct 13 10:57:41 FPGA-test kernel: [   61.945879]  xdx_fops_ioctl+0x59/0xb0 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945880]  do_vfs_ioctl+0xa9/0x640
Oct 13 10:57:41 FPGA-test kernel: [   61.945881]  ? vfs_write+0x16a/0x1a0
Oct 13 10:57:41 FPGA-test kernel: [   61.945882]  ksys_ioctl+0x75/0x80
Oct 13 10:57:41 FPGA-test kernel: [   61.945883]  __x64_sys_ioctl+0x1a/0x20
Oct 13 10:57:41 FPGA-test kernel: [   61.945884]  do_syscall_64+0x57/0x190
Oct 13 10:57:41 FPGA-test kernel: [   61.945885]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 13 10:57:41 FPGA-test kernel: [   61.945886] RIP: 0033:0x7f0b53a10217
Oct 13 10:57:41 FPGA-test kernel: [   61.945886] Code: b3 66 90 48 8b 05 71 4c 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4c 2d 00 f7 d8 64 89 01 48
Oct 13 10:57:41 FPGA-test kernel: [   61.945887] RSP: 002b:00007fffc6980ed8 EFLAGS: 00000286 ORIG_RAX: 0000000000000010
Oct 13 10:57:41 FPGA-test kernel: [   61.945887] RAX: ffffffffffffffda RBX: 000000000000004d RCX: 00007f0b53a10217
Oct 13 10:57:41 FPGA-test kernel: [   61.945888] RDX: 00007fffc6980f20 RSI: 00000000c0206440 RDI: 000000000000000f
Oct 13 10:57:41 FPGA-test kernel: [   61.945888] RBP: 00007fffc6980f50 R08: 00007f0b4eed85d0 R09: 0000000000000082
Oct 13 10:57:41 FPGA-test kernel: [   61.945889] R10: 00005566ef1b97d0 R11: 0000000000000286 R12: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945889] R13: 00000000000000e7 R14: 0000000000000008 R15: 00000000ffffffff
Oct 13 10:57:41 FPGA-test kernel: [   61.945890] Modules linked in: xdxgpu(OE) drm_kms_helper drm fb_sys_fops syscopyarea sysfillrect sysimgblt ipmi_devintf ipmi_msghandler rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache snd_hda_codec_hdmi binfmt_misc x86_pkg_temp_thermal intel_powerclamp coretemp nls_iso8859_1 kvm snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hda_core snd_hwdep crct10dif_pclmul snd_pcm crc32_pclmul snd_seq_midi ghash_clmulni_intel snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device aesni_intel snd_timer eeepc_wmi crypto_simd asus_wmi cryptd input_leds joydev glue_helper snd sparse_keymap wmi_bmof soundcore acpi_pad acpi_tad mac_hid sch_fq_codel parport_pc ppdev lp sunrpc parport ip_tables x_tables autofs4 hid_generic usbhid hid ahci r8125(OE) libahci wmi video
Oct 13 10:57:41 FPGA-test kernel: [   61.945905] CR2: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945906] ---[ end trace c2942d82781d12b6 ]---
Oct 13 10:57:41 FPGA-test kernel: [   61.945915] RIP: 0010:RGXCmdHelperAcquireCmdCCB+0x265/0x370 [xdxgpu]
Oct 13 10:57:41 FPGA-test kernel: [   61.945916] Code: 48 83 c0 08 39 53 14 48 89 45 e0 0f 86 73 fe ff ff 48 8b 73 18 89 d1 8b 0c 8e 89 08 83 e1 01 75 d1 48 8b 4b 20 89 fe 83 c7 01 <8b> 0c b1 89 48 04 eb c7 b8 68 01 00 00 48 8b 7d e8 65 48 33 3c 25
Oct 13 10:57:41 FPGA-test kernel: [   61.945916] RSP: 0018:ffffba608339b650 EFLAGS: 00010202
Oct 13 10:57:41 FPGA-test kernel: [   61.945917] RAX: ffffba6080be8b80 RBX: ffff967af2a4a808 RCX: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945917] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000001
Oct 13 10:57:41 FPGA-test kernel: [   61.945918] RBP: ffffba608339b678 R08: 0000000000007fff R09: 0000000000008000
Oct 13 10:57:41 FPGA-test kernel: [   61.945918] R10: 0000000000007fff R11: 0000000000000002 R12: ffff967af2a4a898
Oct 13 10:57:41 FPGA-test kernel: [   61.945919] R13: ffff967af8ac3480 R14: ffffffffc0a81c91 R15: 0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945919] FS:  00007f0b55e2fd00(0000) GS:ffff967aff300000(0000) knlGS:0000000000000000
Oct 13 10:57:41 FPGA-test kernel: [   61.945919] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Oct 13 10:57:41 FPGA-test kernel: [   61.945920] CR2: 0000000000000000 CR3: 00000008365b0001 CR4: 0000000000760ee0
Oct 13 10:57:41 FPGA-test kernel: [   61.945920] PKRU: 55555554

分析日志可知,引用了空指针指向的数据。 由于之前比较旧的代码未出现该问题,所以怀疑是提交代码引入的新问题。通过二分查找回退代码,最终找到是某个commit引入的问题,而该commit修改代码量较多,经过不断排查,发现如下修改会引入问题:

PhysmemExportPMRToDmaBuf(PVRSRV_DEVICE_NODE *psDevNode, PMR *psPMR,
	    struct dma_resv *resv, int flags,
                    struct dma_buf **ppDmaBuf)
{
	struct dma_buf *psDmaBuf;
	IMG_DEVMEM_SIZE_T uiPMRSize;
	PVRSRV_ERROR eError;
	PVRDmaBufPrivData *priv;

	priv = OSAllocZMem(sizeof(*priv));
	if (priv == NULL)
		return PVRSRV_ERROR_OUT_OF_MEMORY;

	priv->psPMR = psPMR;

	mutex_lock(&g_HashLock);

	PMRRefPMR(psPMR);

	PMR_LogicalSize(psPMR, &uiPMRSize);

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
	{
		DEFINE_DMA_BUF_EXPORT_INFO(sDmaBufExportInfo);

		sDmaBufExportInfo.priv  = priv;
		sDmaBufExportInfo.ops   = &sPVRDmaBufOps;
		sDmaBufExportInfo.size  = uiPMRSize;
		sDmaBufExportInfo.flags = flags;
+		sDmaBufExportInfo.resv = resv;

		psDmaBuf = dma_buf_export(&sDmaBufExportInfo);
	}
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
	psDmaBuf = dma_buf_export(priv, &sPVRDmaBufOps,
-	                          uiPMRSize, flags, NULL);
+	                          uiPMRSize, flags, resv);
#else
	psDmaBuf = dma_buf_export(priv, &sPVRDmaBufOps,
	                          uiPMRSize, flags);
#endif

-	PMRSetDmaBuf(priv->psPMR, psDmaBuf);
-
	if (IS_ERR_OR_NULL(psDmaBuf))
	{
		PVR_DPF((PVR_DBG_ERROR, "%s: Failed to export buffer (err=%ld)",
		         __func__, psDmaBuf ? PTR_ERR(psDmaBuf) : -ENOMEM));
		eError = PVRSRV_ERROR_OUT_OF_MEMORY;
		goto fail_pmr_ref;
	}

	mutex_unlock(&g_HashLock);
	/* A PMR memory lay out can't change once exported
	 * This makes sure the exported and imported parties see
	 * the same layout of the memory */
	PMR_SetLayoutFixed(psPMR, IMG_TRUE);

	*ppDmaBuf = psDmaBuf;

	return PVRSRV_OK;

fail_pmr_ref:
	mutex_unlock(&g_HashLock);
	PMRUnrefPMR(psPMR);
	OSFreeMem(priv);

	PVR_ASSERT(eError != PVRSRV_OK);
	return eError;
}

struct dma_buf *
PhysmemGetDmaBuf(PMR *psPMR)
{
	PMR_DMA_BUF_DATA *psPrivData;

	psPrivData = PMRGetPrivateData(psPMR, &_sPMRDmaBufFuncTab);
	if (psPrivData)
	{
		return psPrivData->psAttachment->dmabuf;
	}
-	else
-	{
-		struct dma_buf * dma_buf = PMRGetDmaBuf(psPMR);
-		if (dma_buf)
-			return dma_buf;
-	}

	return NULL;
}

static struct dma_resv *
pmr_reservation_object_get(struct _PMR_ *pmr)
{
	struct dma_buf *dmabuf;
+	struct drm_gem_object *gobj;

	dmabuf = PhysmemGetDmaBuf(pmr);
	if (dmabuf)
		return dmabuf->resv;

+	gobj = (struct drm_gem_object*) PMRGetGemObject(pmr);
+	if (gobj)
+		return gobj->resv;

	return NULL;
}

如果函数PhysmemExportPMRToDmaBuf不添加语句sDmaBufExportInfo.resv = resv,则sDmaBufExportInfo.resv默认初始化为0(结构体成员部分显示初始化时,其余未显示初始化的成员默认为0)。

函数dma_buf_export的代码如下:

struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
{
	struct dma_buf *dmabuf;
	struct dma_resv *resv = exp_info->resv;
	struct file *file;
	size_t alloc_size = sizeof(struct dma_buf);
	int ret;

	if (!exp_info->resv)
		alloc_size += sizeof(struct dma_resv);
	else
		/* prevent &dma_buf[1] == dma_buf->resv */
		alloc_size += 1;

	if (WARN_ON(!exp_info->priv
			  || !exp_info->ops
			  || !exp_info->ops->map_dma_buf
			  || !exp_info->ops->unmap_dma_buf
			  || !exp_info->ops->release)) {
		return ERR_PTR(-EINVAL);
	}

	if (!try_module_get(exp_info->owner))
		return ERR_PTR(-ENOENT);

	dmabuf = kzalloc(alloc_size, GFP_KERNEL);
	if (!dmabuf) {
		ret = -ENOMEM;
		goto err_module;
	}

	dmabuf->priv = exp_info->priv;
	dmabuf->ops = exp_info->ops;
	dmabuf->size = exp_info->size;
	dmabuf->exp_name = exp_info->exp_name;
	dmabuf->owner = exp_info->owner;
	spin_lock_init(&dmabuf->name_lock);
	init_waitqueue_head(&dmabuf->poll);
	dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
	dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;

	if (!resv) {
		resv = (struct dma_resv *)&dmabuf[1];
		dma_resv_init(resv);
	}
	dmabuf->resv = resv;

	file = dma_buf_getfile(dmabuf, exp_info->flags);
	if (IS_ERR(file)) {
		ret = PTR_ERR(file);
		goto err_dmabuf;
	}

	file->f_mode |= FMODE_LSEEK;
	dmabuf->file = file;

	mutex_init(&dmabuf->lock);
	INIT_LIST_HEAD(&dmabuf->attachments);

	mutex_lock(&db_list.lock);
	list_add(&dmabuf->list_node, &db_list.head);
	mutex_unlock(&db_list.lock);

	return dmabuf;

err_dmabuf:
	kfree(dmabuf);
err_module:
	module_put(exp_info->owner);
	return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(dma_buf_export);

从代码可知,当sDmaBufExportInfo.resv为0时dma_buf_export会在结构体dma_buf后面创建一个dma_resv,且dma_buf成员resv赋值为新创建的dma_resv的地址;当sDmaBufExportInfo.resv不为0时dma_buf成员resv赋值为sDmaBufExportInfo.resv。

跟踪代码调用流程发现,有如下两条路径会调用函数PhysmemExportPMRToDmaBuf:

  1. gem object(struct drm_gem_object_funcs)的成员export(被初始化为函数xdx_gem_prime_export)被调用时,最终会调用函数PhysmemExportPMRToDmaBuf。
  2. 上层应用通过ioctl调用pvr的私有ioctl时,即调用KMD的函数PVRSRVBridgePhysmemExportDmaBuf时,最终会调用函数PhysmemExportPMRToDmaBuf。

其中第1调路径,gem object转换为xdx_bo后,xdx_bo的成员psPMR、gem object的成员resv最终都会传入函数PhysmemExportPMRToDmaBuf的参数;第二条路径,上层应用传下来的是PMR handle,通过PMR handle找到psPMR(最终传入PhysmemExportPMRToDmaBuf的参数),然后根据psPMR查找相应的gem object(创建xdx_bo的时候可能会分配PMR,这时PMR的成员gobj就会被设置为相应的gem object;如果PMR是上层应用通过pvr的私有ioctl创建的,则不会设置PMR的成员gobj),如果找到了gem object就将gem object成员resv传入PhysmemExportPMRToDmaBuf的参数,否则传入NULL。

2.2 问题场景推测

根据上面分析可知,PhysmemExportPMRToDmaBuf的参数可能来自gem object的resv。继续查看引入问题的commit的如下修改:

struct dma_buf *
PhysmemGetDmaBuf(PMR *psPMR)
{
	PMR_DMA_BUF_DATA *psPrivData;

	psPrivData = PMRGetPrivateData(psPMR, &_sPMRDmaBufFuncTab);
	if (psPrivData)
	{
		return psPrivData->psAttachment->dmabuf;
	}
-	else
-	{
-		struct dma_buf * dma_buf = PMRGetDmaBuf(psPMR);
-		if (dma_buf)
-			return dma_buf;
-	}

	return NULL;
}

static struct dma_resv *
pmr_reservation_object_get(struct _PMR_ *pmr)
{
	struct dma_buf *dmabuf;
+	struct drm_gem_object *gobj;

	dmabuf = PhysmemGetDmaBuf(pmr);
	if (dmabuf)
		return dmabuf->resv;

+	gobj = (struct drm_gem_object*) PMRGetGemObject(pmr);
+	if (gobj)
+		return gobj->resv;

	return NULL;
}

根据修改可知,修改前函数PhysmemGetDmaBuf从PMR找到相应的dma_buf,然后pmr_reservation_object_get返回dma_buf的resv,且该resv和gem object的resv无关。修改后,函数pmr_reservation_object_get从PMR找到相应的gem object,并返回gem object的resv。以前的代码Xorg最终调用函数pmr_reservation_object_get从dma_buf获取resv,所以推测调用该函数时的参数pmr之前都是被export成dma_buf了的。

根据上面分析推测如下场景:

  1. xgltest1创建了gem object。
  2. xgltest1将gem object export成dma_buf。
  3. xgltest1将dma_buf关联的file的fd发送给Xorg,使Xorg也获取了dma_buf,此时dma_buf相关的file的引用计数会增1。详情见我的另一篇文章《Linux中进程间传递文件描述符》。
  4. Xorg最终调用pvr的函数PVRSRVDmaBufImportDevMem将dma_buf import进来从而获取pmr。
  5. xgltest1退出,xgltest1创建的gem object被释放,包括gem object的resv、_resv都被释放,但是因为第3步dma_buf相关file的引用计数增1,所以 fput操作只会将引用计数减去1,最终不会调用函数dma_buf_release释放dma_buf。这步也可能在第4步前。
  6. Xorg后续使用pmr时最终会调用到内核驱动KMD的函数pmr_reservation_object_get获取dma_buf的resv,修改后获取的是gem object的resv,而此时gem object已经被释放,如果使用gem object的resv则会出现use after free的问题。

修改前没问题,是因为export的dma_buf的resv是函数dma_buf_export中创建的,所以gem object释放不会影响dma_buf的resv。

根据上面的猜测,添加调试信息后有如下打印:

......
Oct 17 16:23:25 FPGA-test kernel: [   88.157457] xdx_bo_create 486 resv=0xffff8dd0f0b11af8        //xgltest1创建gem object,gem object的resv等于0xffff8dd0f0b11af8 
Oct 17 16:23:25 FPGA-test kernel: [   88.157458] CPU: 4 PID: 3687 Comm: xgltest1 Tainted: G           OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 17 16:23:25 FPGA-test kernel: [   88.157458] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 17 16:23:25 FPGA-test kernel: [   88.157459] Call Trace:
Oct 17 16:23:25 FPGA-test kernel: [   88.157460]  dump_stack+0x6d/0x95
Oct 17 16:23:25 FPGA-test kernel: [   88.157467]  xdx_bo_create+0x19a/0x230 [xdxgpu]
Oct 17 16:23:25 FPGA-test kernel: [   88.157473]  xdx_gem_object_create+0x75/0xe0 [xdxgpu]
Oct 17 16:23:25 FPGA-test kernel: [   88.157480]  xdx_drm_dumb_create+0xa5/0x120 [xdxgpu]
Oct 17 16:23:25 FPGA-test kernel: [   88.157481]  ? unix_stream_recvmsg+0x51/0x70
Oct 17 16:23:25 FPGA-test kernel: [   88.157482]  ? __unix_insert_socket+0x40/0x40
Oct 17 16:23:25 FPGA-test kernel: [   88.157487]  ? drm_mode_create_dumb+0xa0/0xa0 [drm]
Oct 17 16:23:25 FPGA-test kernel: [   88.157494]  drm_mode_create_dumb+0x88/0xa0 [drm]
Oct 17 16:23:25 FPGA-test kernel: [   88.157498]  drm_mode_create_dumb_ioctl+0xe/0x10 [drm]
Oct 17 16:23:25 FPGA-test kernel: [   88.157502]  drm_ioctl_kernel+0xb0/0x100 [drm]
Oct 17 16:23:25 FPGA-test kernel: [   88.157506]  drm_ioctl+0x389/0x450 [drm]
Oct 17 16:23:25 FPGA-test kernel: [   88.157510]  ? drm_mode_create_dumb+0xa0/0xa0 [drm]
Oct 17 16:23:25 FPGA-test kernel: [   88.157512]  ? ___sys_recvmsg+0xa2/0xe0
Oct 17 16:23:25 FPGA-test kernel: [   88.157518]  xdx_fops_ioctl+0x59/0xb0 [xdxgpu]
Oct 17 16:23:25 FPGA-test kernel: [   88.157519]  do_vfs_ioctl+0xa9/0x640
Oct 17 16:23:25 FPGA-test kernel: [   88.157520]  ? fput+0x13/0x20
Oct 17 16:23:25 FPGA-test kernel: [   88.157521]  ? __sys_recvmsg+0x8f/0xa0
Oct 17 16:23:25 FPGA-test kernel: [   88.157522]  ksys_ioctl+0x75/0x80
Oct 17 16:23:25 FPGA-test kernel: [   88.157522]  __x64_sys_ioctl+0x1a/0x20
Oct 17 16:23:25 FPGA-test kernel: [   88.157523]  do_syscall_64+0x57/0x190
Oct 17 16:23:25 FPGA-test kernel: [   88.157524]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 17 16:23:25 FPGA-test kernel: [   88.157524] RIP: 0033:0x7fdbee7d9217
Oct 17 16:23:25 FPGA-test kernel: [   88.157525] Code: b3 66 90 48 8b 05 71 4c 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4c 2d 00 f7 d8 64 89 01 48
Oct 17 16:23:25 FPGA-test kernel: [   88.157525] RSP: 002b:00007ffffd1c47b8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
Oct 17 16:23:25 FPGA-test kernel: [   88.157525] RAX: ffffffffffffffda RBX: 00005617e22c9878 RCX: 00007fdbee7d9217
Oct 17 16:23:25 FPGA-test kernel: [   88.157526] RDX: 00007ffffd1c4830 RSI: 00000000c02064b2 RDI: 0000000000000004
Oct 17 16:23:25 FPGA-test kernel: [   88.157526] RBP: 00007ffffd1c47f0 R08: 00005617e1f8aac8 R09: 00007ffffd1c4ae0
Oct 17 16:23:25 FPGA-test kernel: [   88.157526] R10: 0000000000000000 R11: 0000000000000206 R12: 00005617e011e700
Oct 17 16:23:25 FPGA-test kernel: [   88.157526] R13: 00007ffffd1c57b0 R14: 0000000000000000 R15: 0000000000000000
Oct 17 16:23:25 FPGA-test kernel: [   88.157529] PhysmemExportPMRToDmaBuf 1018 resv=0xffff8dd0f0b11af8 sDmaBufExportInfo.resv=0xffff8dd0f0b11af8    //xgltest1 export该gem object为dma_buf
........
Oct 17 16:23:27 FPGA-test kernel: [   89.452391] xdx_gem_object_free 130: obj->resv=0xffff8dd0f0b11af8 psPMR=0xffff8dd0d2fa1800        //xgltest1释放gem object,且其resv指针等于0xffff8dd0f0b11af8
Oct 17 16:23:27 FPGA-test kernel: [   89.452392] CPU: 6 PID: 3687 Comm: xgltest1 Tainted: G           OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 17 16:23:27 FPGA-test kernel: [   89.452392] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 17 16:23:27 FPGA-test kernel: [   89.452392] Call Trace:
Oct 17 16:23:27 FPGA-test kernel: [   89.452393]  dump_stack+0x6d/0x95
Oct 17 16:23:27 FPGA-test kernel: [   89.452399]  xdx_gem_object_free+0xbd/0xd0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.452403]  drm_gem_object_free+0x23/0x80 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452408]  drm_gem_object_put_unlocked+0x53/0x70 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452412]  drm_gem_object_handle_put_unlocked+0x6a/0xb0 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452416]  drm_gem_object_release_handle+0x74/0xa0 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452420]  ? drm_gem_object_handle_put_unlocked+0xb0/0xb0 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452422]  idr_for_each+0x4c/0xd0
Oct 17 16:23:27 FPGA-test kernel: [   89.452426]  drm_gem_release+0x20/0x30 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452431]  drm_file_free.part.11+0x215/0x260 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452435]  drm_release+0x9e/0xd0 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.452443]  xdx_fops_release+0xe/0x10 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.452445]  __fput+0xc6/0x260
Oct 17 16:23:27 FPGA-test kernel: [   89.452446]  ____fput+0xe/0x10
Oct 17 16:23:27 FPGA-test kernel: [   89.452447]  task_work_run+0x9d/0xc0
Oct 17 16:23:27 FPGA-test kernel: [   89.452449]  do_exit+0x379/0xb80
Oct 17 16:23:27 FPGA-test kernel: [   89.452451]  do_group_exit+0x43/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.452452]  get_signal+0x14f/0x860
Oct 17 16:23:27 FPGA-test kernel: [   89.452453]  do_signal+0x34/0x6d0
Oct 17 16:23:27 FPGA-test kernel: [   89.452454]  ? fput+0x13/0x20
Oct 17 16:23:27 FPGA-test kernel: [   89.452454]  ? __sys_recvmsg+0x8f/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.452455]  exit_to_usermode_loop+0x90/0x130
Oct 17 16:23:27 FPGA-test kernel: [   89.452456]  do_syscall_64+0x170/0x190
Oct 17 16:23:27 FPGA-test kernel: [   89.452457]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 17 16:23:27 FPGA-test kernel: [   89.452457] RIP: 0033:0x7fdbee7d7bb9
.......
Oct 17 16:23:27 FPGA-test kernel: [   89.515217] pmr_reservation_object_get 98 resv=0xffff8dd0fad22af8
Oct 17 16:23:27 FPGA-test kernel: [   89.515220] CPU: 5 PID: 3677 Comm: Xorg Tainted: G           OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 17 16:23:27 FPGA-test kernel: [   89.515221] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 17 16:23:27 FPGA-test kernel: [   89.515222] Call Trace:
Oct 17 16:23:27 FPGA-test kernel: [   89.515226]  dump_stack+0x6d/0x95
Oct 17 16:23:27 FPGA-test kernel: [   89.515272]  pmr_reservation_object_get+0x5a/0x70 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515318]  pvr_buffer_sync_resolve_and_create_fences+0x33b/0x800 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515386]  PVRSRVRGXKickTA3DKM+0x846/0x27a0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515399]  ? put_dec+0x18/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.515407]  ? vsnprintf+0x226/0x510
Oct 17 16:23:27 FPGA-test kernel: [   89.515412]  ? va_format.isra.14+0x70/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.515476]  PVRSRVBridgeRGXKickTA3D2+0x10d3/0x1100 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515537]  BridgedDispatchKM+0x1e3/0x6b0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515597]  PVRSRV_BridgeDispatchKM+0xc8/0x180 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515653]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515682]  drm_ioctl_kernel+0xb0/0x100 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.515706]  drm_ioctl+0x389/0x450 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.515761]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515773]  ? __wake_up_common_lock+0x8c/0xc0
Oct 17 16:23:27 FPGA-test kernel: [   89.515817]  xdx_fops_ioctl+0x59/0xb0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515830]  do_vfs_ioctl+0xa9/0x640
Oct 17 16:23:27 FPGA-test kernel: [   89.515837]  ? vfs_write+0x16a/0x1a0
Oct 17 16:23:27 FPGA-test kernel: [   89.515842]  ksys_ioctl+0x75/0x80
Oct 17 16:23:27 FPGA-test kernel: [   89.515847]  __x64_sys_ioctl+0x1a/0x20
Oct 17 16:23:27 FPGA-test kernel: [   89.515851]  do_syscall_64+0x57/0x190
Oct 17 16:23:27 FPGA-test kernel: [   89.515855]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 17 16:23:27 FPGA-test kernel: [   89.515858] RIP: 0033:0x7efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.515861] Code: b3 66 90 48 8b 05 71 4c 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4c 2d 00 f7 d8 64 89 01 48
Oct 17 16:23:27 FPGA-test kernel: [   89.515863] RSP: 002b:00007ffff67e5708 EFLAGS: 00000286 ORIG_RAX: 0000000000000010
Oct 17 16:23:27 FPGA-test kernel: [   89.515865] RAX: ffffffffffffffda RBX: 000000000000004d RCX: 00007efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.515867] RDX: 00007ffff67e5750 RSI: 00000000c0206440 RDI: 000000000000000f
Oct 17 16:23:27 FPGA-test kernel: [   89.515868] RBP: 00007ffff67e5780 R08: 00007efc75b025d0 R09: 0000000000000082
Oct 17 16:23:27 FPGA-test kernel: [   89.515870] R10: 0000555feaab96e0 R11: 0000000000000286 R12: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.515871] R13: 00000000000000e7 R14: 000000000000000c R15: 00000000ffffffff
Oct 17 16:23:27 FPGA-test kernel: [   89.515875] pmr_reservation_object_get 98 resv=0xffff8dd0f0b11af8                     //Xorg获取并使用已经被释放的resv
Oct 17 16:23:27 FPGA-test kernel: [   89.515878] CPU: 5 PID: 3677 Comm: Xorg Tainted: G           OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 17 16:23:27 FPGA-test kernel: [   89.515879] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 17 16:23:27 FPGA-test kernel: [   89.515880] Call Trace:
Oct 17 16:23:27 FPGA-test kernel: [   89.515884]  dump_stack+0x6d/0x95
Oct 17 16:23:27 FPGA-test kernel: [   89.515932]  pmr_reservation_object_get+0x5a/0x70 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.515978]  pvr_buffer_sync_resolve_and_create_fences+0x33b/0x800 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516041]  PVRSRVRGXKickTA3DKM+0x846/0x27a0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516054]  ? put_dec+0x18/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.516063]  ? vsnprintf+0x226/0x510
Oct 17 16:23:27 FPGA-test kernel: [   89.516069]  ? va_format.isra.14+0x70/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.516133]  PVRSRVBridgeRGXKickTA3D2+0x10d3/0x1100 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516195]  BridgedDispatchKM+0x1e3/0x6b0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516252]  PVRSRV_BridgeDispatchKM+0xc8/0x180 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516308]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516339]  drm_ioctl_kernel+0xb0/0x100 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.516362]  drm_ioctl+0x389/0x450 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.516415]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516426]  ? __wake_up_common_lock+0x8c/0xc0
Oct 17 16:23:27 FPGA-test kernel: [   89.516468]  xdx_fops_ioctl+0x59/0xb0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516480]  do_vfs_ioctl+0xa9/0x640
Oct 17 16:23:27 FPGA-test kernel: [   89.516486]  ? vfs_write+0x16a/0x1a0
Oct 17 16:23:27 FPGA-test kernel: [   89.516491]  ksys_ioctl+0x75/0x80
Oct 17 16:23:27 FPGA-test kernel: [   89.516496]  __x64_sys_ioctl+0x1a/0x20
Oct 17 16:23:27 FPGA-test kernel: [   89.516500]  do_syscall_64+0x57/0x190
Oct 17 16:23:27 FPGA-test kernel: [   89.516503]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 17 16:23:27 FPGA-test kernel: [   89.516505] RIP: 0033:0x7efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.516508] Code: b3 66 90 48 8b 05 71 4c 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4c 2d 00 f7 d8 64 89 01 48
Oct 17 16:23:27 FPGA-test kernel: [   89.516510] RSP: 002b:00007ffff67e5708 EFLAGS: 00000286 ORIG_RAX: 0000000000000010
Oct 17 16:23:27 FPGA-test kernel: [   89.516514] RAX: ffffffffffffffda RBX: 000000000000004d RCX: 00007efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.516516] RDX: 00007ffff67e5750 RSI: 00000000c0206440 RDI: 000000000000000f
Oct 17 16:23:27 FPGA-test kernel: [   89.516517] RBP: 00007ffff67e5780 R08: 00007efc75b025d0 R09: 0000000000000082
Oct 17 16:23:27 FPGA-test kernel: [   89.516519] R10: 0000555feaab96e0 R11: 0000000000000286 R12: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.516520] R13: 00000000000000e7 R14: 000000000000000c R15: 00000000ffffffff
Oct 17 16:23:27 FPGA-test kernel: [   89.516555] PVR_K:(Error):  3677: SyncCheckpointGetFirmwareAddr called for psSyncCheckpoint<00000000c6c70bb6>, but ui32ValidationCheck=0x2b2bb [sync_checkpoint.c:1745]  //紧接着立即报错
Oct 17 16:23:27 FPGA-test kernel: [   89.516562] PVR_K:(Fatal):  3677: Debug assertion failed! [1324]
Oct 17 16:23:27 FPGA-test kernel: [   89.516587] ------------[ cut here ]------------
Oct 17 16:23:27 FPGA-test kernel: [   89.516647] WARNING: CPU: 5 PID: 3677 at /workdata/daoyu/src/xdxgpu-1.17rtm/drivers/kernel/xdxkmd/xdxgpu/gfx/services/server/common/sync_checkpoint.c:1324 SyncCheckpointUnref+0x1c8/0x1f0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516657] Modules linked in: xdxgpu(OE) drm_kms_helper drm fb_sys_fops syscopyarea sysfillrect sysimgblt ipmi_devintf ipmi_msghandler rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache snd_hda_codec_hdmi binfmt_misc x86_pkg_temp_thermal intel_powerclamp coretemp kvm nls_iso8859_1 snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hda_core snd_hwdep snd_pcm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer aesni_intel crypto_simd eeepc_wmi snd cryptd asus_wmi joydev input_leds glue_helper sparse_keymap wmi_bmof soundcore mac_hid acpi_tad acpi_pad sch_fq_codel parport_pc ppdev lp parport sunrpc ip_tables x_tables autofs4 hid_generic usbhid hid ahci r8125(OE) libahci wmi video
Oct 17 16:23:27 FPGA-test kernel: [   89.516708] CPU: 5 PID: 3677 Comm: Xorg Tainted: G           OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 17 16:23:27 FPGA-test kernel: [   89.516710] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 17 16:23:27 FPGA-test kernel: [   89.516762] RIP: 0010:SyncCheckpointUnref+0x1c8/0x1f0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516765] Code: 48 89 df e8 6a eb ff ff e9 a1 fe ff ff 48 c7 c1 3d ca b9 c0 ba 2c 05 00 00 48 c7 c6 a0 90 b6 c0 bf 01 00 00 00 e8 a8 9e 06 00 <0f> 0b e9 63 fe ff ff 49 8b bd 68 01 00 00 44 8b 83 80 00 00 00 31
Oct 17 16:23:27 FPGA-test kernel: [   89.516769] RSP: 0018:ffffada4c40ff5c0 EFLAGS: 00010282
Oct 17 16:23:27 FPGA-test kernel: [   89.516772] RAX: 0000000000000000 RBX: ffff8dd0f206d8c0 RCX: 0000000000000006
Oct 17 16:23:27 FPGA-test kernel: [   89.516774] RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000202
Oct 17 16:23:27 FPGA-test kernel: [   89.516776] RBP: ffffada4c40ff5d8 R08: 000000000000552c R09: ffffffff8efbd4fc
Oct 17 16:23:27 FPGA-test kernel: [   89.516777] R10: 0000000000000002 R11: ffffada4c40ff3a0 R12: ffff8dd0d35b8100
Oct 17 16:23:27 FPGA-test kernel: [   89.516779] R13: ffff8dd0f089a000 R14: ffff8dd0d2ff2844 R15: ffff8dd0f4c2d810
Oct 17 16:23:27 FPGA-test kernel: [   89.516782] FS:  00007efc7ca59d00(0000) GS:ffff8dd0ff340000(0000) knlGS:0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.516784] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Oct 17 16:23:27 FPGA-test kernel: [   89.516786] CR2: 00007f7b14282000 CR3: 00000008321e4003 CR4: 0000000000760ee0
Oct 17 16:23:27 FPGA-test kernel: [   89.516788] PKRU: 55555554
Oct 17 16:23:27 FPGA-test kernel: [   89.516789] Call Trace:
Oct 17 16:23:27 FPGA-test kernel: [   89.516842]  SyncCheckpointDropRef+0x13/0x20 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516894]  _AppendCheckpoints+0x8d/0x110 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.516946]  SyncAddrListAppendAndDeRefCheckpoints+0x13/0x20 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517012]  PVRSRVRGXKickTA3DKM+0x22e1/0x27a0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517021]  ? put_dec+0x18/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.517026]  ? vsnprintf+0x226/0x510
Oct 17 16:23:27 FPGA-test kernel: [   89.517030]  ? va_format.isra.14+0x70/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.517093]  PVRSRVBridgeRGXKickTA3D2+0x10d3/0x1100 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517150]  BridgedDispatchKM+0x1e3/0x6b0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517207]  PVRSRV_BridgeDispatchKM+0xc8/0x180 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517258]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517283]  drm_ioctl_kernel+0xb0/0x100 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.517307]  drm_ioctl+0x389/0x450 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.517357]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517363]  ? __wake_up_common_lock+0x8c/0xc0
Oct 17 16:23:27 FPGA-test kernel: [   89.517407]  xdx_fops_ioctl+0x59/0xb0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517413]  do_vfs_ioctl+0xa9/0x640
Oct 17 16:23:27 FPGA-test kernel: [   89.517418]  ? vfs_write+0x16a/0x1a0
Oct 17 16:23:27 FPGA-test kernel: [   89.517423]  ksys_ioctl+0x75/0x80
Oct 17 16:23:27 FPGA-test kernel: [   89.517428]  __x64_sys_ioctl+0x1a/0x20
Oct 17 16:23:27 FPGA-test kernel: [   89.517432]  do_syscall_64+0x57/0x190
Oct 17 16:23:27 FPGA-test kernel: [   89.517435]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 17 16:23:27 FPGA-test kernel: [   89.517437] RIP: 0033:0x7efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.517440] Code: b3 66 90 48 8b 05 71 4c 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4c 2d 00 f7 d8 64 89 01 48
Oct 17 16:23:27 FPGA-test kernel: [   89.517442] RSP: 002b:00007ffff67e5708 EFLAGS: 00000286 ORIG_RAX: 0000000000000010
Oct 17 16:23:27 FPGA-test kernel: [   89.517444] RAX: ffffffffffffffda RBX: 000000000000004d RCX: 00007efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.517446] RDX: 00007ffff67e5750 RSI: 00000000c0206440 RDI: 000000000000000f
Oct 17 16:23:27 FPGA-test kernel: [   89.517447] RBP: 00007ffff67e5780 R08: 00007efc75b025d0 R09: 0000000000000082
Oct 17 16:23:27 FPGA-test kernel: [   89.517449] R10: 0000555feaab96e0 R11: 0000000000000286 R12: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.517451] R13: 00000000000000e7 R14: 000000000000000c R15: 00000000ffffffff
Oct 17 16:23:27 FPGA-test kernel: [   89.517456] ---[ end trace 56695c4850d11180 ]---
Oct 17 16:23:27 FPGA-test kernel: [   89.517507] BUG: kernel NULL pointer dereference, address: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.517511] #PF: supervisor read access in kernel mode
Oct 17 16:23:27 FPGA-test kernel: [   89.517516] #PF: error_code(0x0000) - not-present page
Oct 17 16:23:27 FPGA-test kernel: [   89.517519] PGD 0 P4D 0 
Oct 17 16:23:27 FPGA-test kernel: [   89.517525] Oops: 0000 [#1] SMP NOPTI
Oct 17 16:23:27 FPGA-test kernel: [   89.517529] CPU: 5 PID: 3677 Comm: Xorg Tainted: G        W  OE     5.4.0-42-generic #46~18.04.1-Ubuntu
Oct 17 16:23:27 FPGA-test kernel: [   89.517531] Hardware name: ASUS System Product Name/PRIME Z590-P, BIOS 1017 07/12/2021
Oct 17 16:23:27 FPGA-test kernel: [   89.517593] RIP: 0010:RGXCmdHelperAcquireCmdCCB+0x265/0x370 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517597] Code: 48 83 c0 08 39 53 14 48 89 45 e0 0f 86 73 fe ff ff 48 8b 73 18 89 d1 8b 0c 8e 89 08 83 e1 01 75 d1 48 8b 4b 20 89 fe 83 c7 01 <8b> 0c b1 89 48 04 eb c7 b8 68 01 00 00 48 8b 7d e8 65 48 33 3c 25
Oct 17 16:23:27 FPGA-test kernel: [   89.517601] RSP: 0018:ffffada4c40ff618 EFLAGS: 00010202
Oct 17 16:23:27 FPGA-test kernel: [   89.517605] RAX: ffffada4c21b15e8 RBX: ffff8dd0f77dc808 RCX: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.517607] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000001
Oct 17 16:23:27 FPGA-test kernel: [   89.517610] RBP: ffffada4c40ff640 R08: 0000000000000002 R09: 0000000000007fff
Oct 17 16:23:27 FPGA-test kernel: [   89.517612] R10: 0000000000000000 R11: 0000000000000002 R12: ffff8dd0f77dc898
Oct 17 16:23:27 FPGA-test kernel: [   89.517615] R13: ffff8dd0f22d77e0 R14: ffffffffc0ba58d2 R15: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.517618] FS:  00007efc7ca59d00(0000) GS:ffff8dd0ff340000(0000) knlGS:0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.517621] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Oct 17 16:23:27 FPGA-test kernel: [   89.517623] CR2: 0000000000000000 CR3: 00000008321e4003 CR4: 0000000000760ee0
Oct 17 16:23:27 FPGA-test kernel: [   89.517625] PKRU: 55555554
Oct 17 16:23:27 FPGA-test kernel: [   89.517628] Call Trace:
Oct 17 16:23:27 FPGA-test kernel: [   89.517694]  PVRSRVRGXKickTA3DKM+0x18c0/0x27a0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517703]  ? put_dec+0x18/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.517708]  ? vsnprintf+0x226/0x510
Oct 17 16:23:27 FPGA-test kernel: [   89.517713]  ? va_format.isra.14+0x70/0xa0
Oct 17 16:23:27 FPGA-test kernel: [   89.517778]  PVRSRVBridgeRGXKickTA3D2+0x10d3/0x1100 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517836]  BridgedDispatchKM+0x1e3/0x6b0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517891]  PVRSRV_BridgeDispatchKM+0xc8/0x180 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517944]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.517971]  drm_ioctl_kernel+0xb0/0x100 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.517996]  drm_ioctl+0x389/0x450 [drm]
Oct 17 16:23:27 FPGA-test kernel: [   89.518047]  ? PVRSRVDriverThreadExit+0x30/0x30 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.518055]  ? __wake_up_common_lock+0x8c/0xc0
Oct 17 16:23:27 FPGA-test kernel: [   89.518096]  xdx_fops_ioctl+0x59/0xb0 [xdxgpu]
Oct 17 16:23:27 FPGA-test kernel: [   89.518104]  do_vfs_ioctl+0xa9/0x640
Oct 17 16:23:27 FPGA-test kernel: [   89.518109]  ? vfs_write+0x16a/0x1a0
Oct 17 16:23:27 FPGA-test kernel: [   89.518114]  ksys_ioctl+0x75/0x80
Oct 17 16:23:27 FPGA-test kernel: [   89.518119]  __x64_sys_ioctl+0x1a/0x20
Oct 17 16:23:27 FPGA-test kernel: [   89.518125]  do_syscall_64+0x57/0x190
Oct 17 16:23:27 FPGA-test kernel: [   89.518129]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
Oct 17 16:23:27 FPGA-test kernel: [   89.518132] RIP: 0033:0x7efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.518135] Code: b3 66 90 48 8b 05 71 4c 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 41 4c 2d 00 f7 d8 64 89 01 48
Oct 17 16:23:27 FPGA-test kernel: [   89.518140] RSP: 002b:00007ffff67e5708 EFLAGS: 00000286 ORIG_RAX: 0000000000000010
Oct 17 16:23:27 FPGA-test kernel: [   89.518144] RAX: ffffffffffffffda RBX: 000000000000004d RCX: 00007efc7a63a217
Oct 17 16:23:27 FPGA-test kernel: [   89.518146] RDX: 00007ffff67e5750 RSI: 00000000c0206440 RDI: 000000000000000f
Oct 17 16:23:27 FPGA-test kernel: [   89.518149] RBP: 00007ffff67e5780 R08: 00007efc75b025d0 R09: 0000000000000082
Oct 17 16:23:27 FPGA-test kernel: [   89.518151] R10: 0000555feaab96e0 R11: 0000000000000286 R12: 0000000000000000
Oct 17 16:23:27 FPGA-test kernel: [   89.518154] R13: 00000000000000e7 R14: 000000000000000c R15: 00000000ffffffff

从日志可以发现:

  1. xgltest1创建了gem object,gem object的resv等于0xffff8dd0f0b11af8。
  2. xgltest1 export该gem object为dma_buf。
  3. xgltest1退出时,会释放多个gem object,其中一个gem object的resv指针值为0xffff8dd0f0b11af8。
  4. xorg获取已经释放的gem object的resv。
  5. sync checkpoint报错,报错误NULL pointer dereference,kernel crash。

根据日志验证了上面的猜测。

xgltest1可能会调用libxcb的如下函数发送dma_buf相关的file的fd给xorg:

libxcb\libxcb-1.14\source\src\xcb_conn.c

static int write_vec(xcb_connection_t *c, struct iovec **vector, int *count)
{
    int n;
    assert(!c->out.queue_len);

.......
    n = *count;
    if (n > IOV_MAX)
        n = IOV_MAX;

#if HAVE_SENDMSG
    if (c->out.out_fd.nfd) {
        union {
            struct cmsghdr cmsghdr;
            char buf[CMSG_SPACE(XCB_MAX_PASS_FD * sizeof(int))];
        } cmsgbuf;
        struct msghdr msg = {
            .msg_name = NULL,
            .msg_namelen = 0,
            .msg_iov = *vector,
            .msg_iovlen = n,
            .msg_control = cmsgbuf.buf,
            .msg_controllen = CMSG_LEN(c->out.out_fd.nfd * sizeof (int)),
        };
        int i;
        struct cmsghdr *hdr = CMSG_FIRSTHDR(&msg);

        hdr->cmsg_len = msg.msg_controllen;
        hdr->cmsg_level = SOL_SOCKET;
        hdr->cmsg_type = SCM_RIGHTS;   //表示发送fd
        memcpy(CMSG_DATA(hdr), c->out.out_fd.fd, c->out.out_fd.nfd * sizeof (int));

        n = sendmsg(c->fd, &msg, 0);    //发送fd
        if(n < 0 && errno == EAGAIN)
            return 1;
        for (i = 0; i < c->out.out_fd.nfd; i++)
            close(c->out.out_fd.fd[i]);
        c->out.out_fd.nfd = 0;
    } else
#endif
    {
        n = writev(c->fd, *vector, n);
        if(n < 0 && errno == EAGAIN)
            return 1;
    }

.......
}

三、修改方法

根据上面的分析可知,只要export dma_buf的时候,增加gem object的引用计数,然后dma_buf被release的时候减少gem object的引用计数,即最后可能是由Xorg释放gem object(Xorg关闭dma_buf相关的file,file引用计数减去1后为0,最终调用dma_buf_release  ->  dmabuf->ops->release(dmabuf) 即函数PVRDmaBufOpsRelease),就可以解决这个问题,修改如下:

static const struct dma_buf_ops sPVRDmaBufOps =
{
	.attach        = PVRDmaBufOpsAttach,
	.detach		   = PVRDmaBufOpsDetach,
	.map_dma_buf   = PVRDmaBufOpsMap,
	.unmap_dma_buf = PVRDmaBufOpsUnmap,
	.release       = PVRDmaBufOpsRelease,
......
};

static void PVRDmaBufOpsRelease(struct dma_buf *psDmaBuf)
{
	PVRDmaBufPrivData *priv = psDmaBuf->priv;
	PMR *psPMR = priv->psPMR;
+	struct drm_gem_object *gobj;

	PMRUnrefPMR(psPMR);

+	gobj = (struct drm_gem_object*) PMRGetGemObject(psPMR);
+	if (gobj)
+		drm_gem_object_put_unlocked(gobj);

	OSFreeMem(priv);
}


PhysmemExportPMRToDmaBuf(PVRSRV_DEVICE_NODE *psDevNode, PMR *psPMR,
	    struct dma_resv *resv, int flags,
                    struct dma_buf **ppDmaBuf)
{
	IMG_DEVMEM_SIZE_T uiPMRSize;
	PVRSRV_ERROR eError;
	PVRDmaBufPrivData *priv;
+	struct drm_gem_object *gobj;

	priv = OSAllocZMem(sizeof(*priv));
	if (priv == NULL)
		return PVRSRV_ERROR_OUT_OF_MEMORY;

	priv->psPMR = psPMR;

	mutex_lock(&g_HashLock);

	PMRRefPMR(psPMR);

+	gobj = (struct drm_gem_object*) PMRGetGemObject(psPMR);
+	if (gobj)
+		drm_gem_object_get(gobj);

	PMR_LogicalSize(psPMR, &uiPMRSize);

.......
fail_pmr_ref:
	mutex_unlock(&g_HashLock);

+	if (gobj)
+		drm_gem_object_put_unlocked(gobj);

	PMRUnrefPMR(psPMR);
	OSFreeMem(priv);
.......
}

修改后验证不会再出现这个问题,且最终Xorg释放了gem object(有些是Xorg退出的时候释放的)。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值