一.首先提出两个问题
1.为什么要拷贝
2.为什么不建议使用memcpy复制
二.为什么要拷贝?
先看一下正常的使用示例
struct globalmem_dev {
struct cdev cdev;
unsigned char mem[GLOBALMEM_SIZE];//长期有效
};
static ssize_t globalmem_read(struct file *filp, char __user * buf, size_t size,
loff_t * ppos)
{
unsigned long p = *ppos;
unsigned int count = size;
int ret = 0;
struct globalmem_dev *dev = filp->private_data;
if (p >= GLOBALMEM_SIZE)
return 0;
if (count > GLOBALMEM_SIZE - p)
count = GLOBALMEM_SIZE - p;
if (copy_to_user(buf, dev->mem + p, count)) {
ret = -EFAULT;
} else {
*ppos += count;
ret = count;
printk(KERN_INFO "read %u bytes(s) from %lu\n", count, p);
}
return ret;
}
static ssize_t globalmem_write(struct file *filp, const char __user * buf,
size_t size, loff_t * ppos)
{
unsigned long p = *ppos;
unsigned int count = size;
int ret = 0;
struct globalmem_dev *dev = filp->private_data;
if (p >= GLOBALMEM_SIZE)
return 0;
if (count > GLOBALMEM_SIZE - p)
count = GLOBALMEM_SIZE - p;
if (copy_from_user(dev->mem + p, buf, count))
ret = -EFAULT;
else {
*ppos += count;
ret = count;
printk(KERN_INFO "written %u bytes(s) from %lu\n", count, p);
}
return ret;
}
1.内核直接访问用户虚拟地址可能是不被允许的
2.buf的虚拟地址旨在该进程1有效;如果切到其他进程2,页表变了,这个buf的虚拟地址是个未知状态,进程2再去读dev->mem就会引发错误
static ssize_t globalmem_write(struct file *filp, const char __user * buf,
size_t size, loff_t * ppos)
{
struct globalmem_dev *dev = filp->private_data;
dev->mem = buf; /**/
return ret;
}
三.为什么不直接用memcpy?
1.内核直接访问用户虚拟地址可能是不被允许的
2.copy_from_user()有自带的access_ok检查,如果用户传进来的buffer不属于用户空间而是内核空间,根本不会拷贝
3.copy_from_user()有自带的page fault后exception修复机制;假设用户程序随便胡乱传个用户态的地址给内核,虽然access_ok是没有问题的-但是这个地址,根本什么有效的数据;如果内核驱动用memcpy就会发生内核Oops
四.内核怎么禁止访问用户虚拟地址的?
CONFIG_ARM64_PAN
PAN (Privileged Access Never)它可以把内核对用户空间的buffer访问限制在特定的代码区间里面,PAN可以阻止kernel直接访问用户,它要求访问之前,必须在硬件上开启访问权限;
根据 https://patchwork.kernel.org/patch/6808781/ 如果系统支持PAN且开启了PAN
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 318175f62c24..c53a4b1d5968 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -597,6 +597,20 @@ config FORCE_MAX_ZONEORDER
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
+config ARM64_PAN
+ bool "Enable support for Privileged Access Never (PAN)"
+ default y
+ help
+ Privileged Access Never (PAN; part of the ARMv8.1 Extensions)
+ prevents the kernel or hypervisor from accessing user-space (EL0)
+ memory directly.
+
+ Choosing this option will cause any unprotected (not using
+ copy_to_user et al) memory access to fail with a permission fault.
+
+ The feature is detected at runtime, and will remain as a 'nop'
+ instruction if the cpu does not implement the feature.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 680a6a1f087e..27ff67d99af5 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -25,8 +25,9 @@
#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
#define ARM64_WORKAROUND_845719 2
#define ARM64_HAS_SYSREG_GIC_CPUIF 3
+#define ARM64_HAS_PAN 4
-#define ARM64_NCAPS 4
+#define ARM64_NCAPS 5
#ifndef __ASSEMBLY__
内核访问用户空间buf,需要修改PSATE寄存器开启访问权限,完事后应该再次修改PSTATE,关闭内核对用户的访问权限
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 5e27add9d362..882c1544a73e 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -15,7 +15,11 @@
*/
#include <linux/linkage.h>
+
+#include <asm/alternative.h>
#include <asm/assembler.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
/*
* Copy from user space to a kernel buffer (alignment handled by the hardware)
@@ -28,6 +32,8 @@
* x0 - bytes not copied
*/
ENTRY(__copy_from_user)
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ CONFIG_ARM64_PAN)
add x4, x1, x2 // upper user buffer boundary
subs x2, x2, #8
b.mi 2f
@@ -51,6 +57,8 @@ USER(9f, ldrh w3, [x1], #2 )
USER(9f, ldrb w3, [x1] )
strb w3, [x0]
5: mov x0, #0
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ CONFIG_ARM64_PAN)
ret
ENDPROC(__copy_from_user)
CONFIG_ARM64_SW_TTBR0_PAN
CONFIG_ARM64_PAN是硬件实现功能(ARMv8.1扩展功能);如果硬件不支持,可以使用CONFIG_ARM64_SW_TTBR0_PAN来软件仿真实现这种功能;
由于ARM64的硬件特殊设计,我们使用两个页表基地址寄存器ttbr0_el1和ttbr1_el1。处理器根据64 bit地址的高16 bit判断访问的地址属于用户空间还是内核空间。如果是用户空间地址则使用ttbr0_el1,反之使用ttbr1_el1。因此,ARM64进程切换的时候,只需要改变ttbr0_el1的值即可。ttbr1_el1可以选择不需要改变,因为所有的进程共享相同的内核空间地址。
ENTRY(__arch_copy_from_user)
uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S" //copy
uaccess_disable_not_uao x3, x4
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
static inline void uaccess_enable_not_uao(void)
{
__uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
}
#define __uaccess_enable(alt) \
do { \
if (!uaccess_ttbr0_enable()) \
asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
CONFIG_ARM64_PAN)); \
} while (0)
定义了CONFIG_ARM64_SW_TTBR0_PAN,就通过操作write_sysreg(ttbr0, ttbr0_el1)来使能或者失能软件仿真PAN的功能;
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void __uaccess_ttbr0_disable(void)
{
unsigned long flags, ttbr;
local_irq_save(flags);
ttbr = read_sysreg(ttbr1_el1);
ttbr &= ~TTBR_ASID_MASK;
/* reserved_ttbr0 placed at the end of swapper_pg_dir */
write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1);
isb();
/* Set reserved ASID */
write_sysreg(ttbr, ttbr1_el1);
isb();
local_irq_restore(flags);
}
static inline void __uaccess_ttbr0_enable(void)
{
unsigned long flags, ttbr0, ttbr1;
/*
* Disable interrupts to avoid preemption between reading the 'ttbr0'
* variable and the MSR. A context switch could trigger an ASID
* roll-over and an update of 'ttbr0'.
*/
local_irq_save(flags);
ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
/* Restore active ASID */
ttbr1 = read_sysreg(ttbr1_el1);
ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
ttbr1 |= ttbr0 & TTBR_ASID_MASK;
write_sysreg(ttbr1, ttbr1_el1);
isb();
/* Restore user page table */
write_sysreg(ttbr0, ttbr0_el1);
isb();
local_irq_restore(flags);
}
static inline bool uaccess_ttbr0_disable(void)
{
if (!system_uses_ttbr0_pan())
return false;
__uaccess_ttbr0_disable();
return true;
}
static inline bool uaccess_ttbr0_enable(void)
{
if (!system_uses_ttbr0_pan())
return false;
__uaccess_ttbr0_enable();
return true;
}
#else
static inline bool uaccess_ttbr0_disable(void)
{
return false;
}
static inline bool uaccess_ttbr0_enable(void)
{
return false;
}
#endif
没定义就通过SET_PSTATE_PAN设置PSTATE寄存器来开始或关闭PAN
static inline void uaccess_enable_not_uao(void)
{
__uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
}
#define __uaccess_enable(alt) \
do { \
if (!uaccess_ttbr0_enable()) \
asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
CONFIG_ARM64_PAN)); \
} while (0)
如何才能避免内核态访问用户态地址空间呢?
改变ttbr0_el1的值即可,指向一段非法的映射即可:为此准备了一份特殊的页表,该页表大小4k内存,其值全是0。当进程切换到内核态后,修改ttbr0_el1的值为该页表的地址即可保证访问用户空间地址是非法访问。这个特殊的页表内存通过链接脚本分配
#define RESERVED_TTBR0_SIZE (PAGE_SIZE)
SECTIONS
{
reserved_ttbr0 = .;
. += RESERVED_TTBR0_SIZE;
swapper_pg_dir = .;
. += SWAPPER_DIR_SIZE;
swapper_pg_end = .;
}
五.总结
(1)无论是内核态还是用户态访问合法的用户空间地址,当虚拟地址并未建立物理地址的映射关系的时候,page fault的流程几乎一样,都会帮助我们申请物理内存并创建映射关系。所以这种情况下memcpy()和copy_{to,from}_user()是类似的
(2)当内核态访问非法用户空间地址的时候,通过.fixup和__ex_table两个段的帮助尝试修复异常。这种修复异常并不是建立地址映射关系,而是修改do_page_fault()返回地址。memcpy()由于没有创建这样的段,所以memcpy()无法做到这点
(3)如果确保用户态buf合法有效,可以用memcpy()函数替代copy_{to,from}_user()。经过一些试验测试,发现使用memcpy(),程序的运行上并没有问题。但建议用copy_{to,from}_user() ,或者{get,put}_user()等有安全性检查和异常处理的接口
(4)在使能CONFIG_ARM64_SW_TTBR0_PAN或者CONFIG_ARM64_PAN(硬件支持的情况下才有效)的时候,我们只能使用copy_{to,from}_user()这种接口(函数实现会关闭和打开PAN的限制),直接使用memcpy()是被禁止的
(5)在使能CONFIG_ARM64_SW_TTBR0_PAN或者CONFIG_ARM64_PAN;在memcpy()调用之前通过uaccess_enable_not_uao()允许内核态访问用户空间地址,调用memcpy(),最后通过uaccess_disable_not_uao()关闭内核态访问用户空间的能力;就可以使用memcpy了