qemu TLB表项:
否命中,如果命中直接从 hva 地址中返回,否则还是通过 mmu 来获取。
如果没有命中,则需要通过 mmu 获取该虚拟地址所对应的物理地址,对 tlb 进行填充。
地址没有对应的物理地址或者权限不够等情况,cpu 就会出现 page_fault 异常。
果有,更新 tlb;否则,保存出错信息。
用大量的 code_buffer。Qemu 使用了外调函数来实现这个功能。在对 target 翻译过程中,对于内存访问操
作会生成如下形式的中间码:
以下 qemu_ld/st_helpers 中的函数进行 target 的虚拟地址和物理地址转换的工作。
qemu 内存访问过程:guest virtual addr (GVA) → guest physical addr (GPA) → host virtualaddr (HVA)。其中 GVA->HVA 由 qemu 负责完成,HVA->HPA 由 host 操作系统完成。tlb 的结构如下,addr_xxx 表示 GVA 地址,同时也表示了执行权限;addrend =gpa_base – gva_base;
typedef struct CPUTLBEntry {
target_ulong addr_read; // 可读
target_ulong addr_write; // 可写
target_ulong addr_code; // 可执行
unsigned long addend;
} CPUTLBEntry;
1.get_page_addr_code 会首先查看 tlb 是否命中,如果没有命中就 ldub_code 走 mmu 翻译这个分支,否则直接获取 hva。313 /* NOTE: this function can trigger an exception */
314 /* NOTE2: the returned address is not exactly the physical address: it
315 is the offset relative to phys_ram_base */
316 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
317 {
318 int mmu_idx, page_index, pd;
319 void *p;
320 MemoryRegion *mr;
321
322 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
323 mmu_idx = cpu_mmu_index(env1);
324 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
325 (addr & TARGET_PAGE_MASK))) {
326 #ifdef CONFIG_TCG_PASS_AREG0
327 cpu_ldub_code(env1, addr);
328 #else
329 ldub_code(addr);
330 #endif
331 }
332 pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
333 mr = iotlb_to_region(pd);
334 if (memory_region_is_unassigned(mr)) {
335 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
336 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
337 #else
338 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
339 TARGET_FMT_lx "\n", addr);
340 #endif
341 }
342 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
343 return qemu_ram_addr_from_host_nofail(p);
344 }
2.TLB 没有命中时,会通过 ldub_code,这个函数是由下面的宏产生。这个宏首先会在 tlb 检查下是否命中,如果命中直接从 hva 地址中返回,否则还是通过 mmu 来获取。
95 static inline RES_TYPE
96 glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
97 target_ulong ptr)
98 {
99 int page_index;
100 RES_TYPE res;
101 target_ulong addr;
102 int mmu_idx;
103
104 addr = ptr;
105 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
106 mmu_idx = CPU_MMU_INDEX;
107 if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
108 (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
109 res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
110 addr,
111 mmu_idx);
112 } else {
113 uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
114 res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
115 }
116 return res;
117 }
3.在这个模板中会对 tlb 进行查询,看是否命中,如果命中,还要根据是 io 还是 ram 进行分别处理;如果没有命中,则需要通过 mmu 获取该虚拟地址所对应的物理地址,对 tlb 进行填充。
106 DATA_TYPE
107 glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
108 target_ulong addr,
109 int mmu_idx)
110 {
111 DATA_TYPE res;
112 int index;
113 target_ulong tlb_addr;
114 target_phys_addr_t ioaddr;
115 uintptr_t retaddr;
116
117 /* test if there is match for unaligned or IO access */
118 /* XXX: could done more in memory macro in a non portable way */
119 index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
120 redo:
121 tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
122 if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
123 if (tlb_addr & ~TARGET_PAGE_MASK) {
124 /* IO access */
125 if ((addr & (DATA_SIZE - 1)) != 0)
126 goto do_unaligned_access;
127 retaddr = GETPC();
128 ioaddr = env->iotlb[mmu_idx][index];
129 res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
130 } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
131 /* slow unaligned access (it spans two pages or IO) */
132 do_unaligned_access:
133 retaddr = GETPC();
134 #ifdef ALIGNED_ONLY
135 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
136 #endif
137 res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
138 mmu_idx, retaddr);
139 } else {
140 /* unaligned/aligned access in the same page */
141 uintptr_t addend;
142 #ifdef ALIGNED_ONLY
143 if ((addr & (DATA_SIZE - 1)) != 0) {
144 retaddr = GETPC();
145 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
146 }
147 #endif
148 addend = env->tlb_table[mmu_idx][index].addend;
149 res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
150 (addr + addend));
151 }
152 } else {
153 /* the page is not in the TLB : fill it */
154 retaddr = GETPC();
155 #ifdef ALIGNED_ONLY
156 if ((addr & (DATA_SIZE - 1)) != 0)
157 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
158 #endif
159 tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
160 goto redo;
161 }
162 return res;
163 }
4.以 arm 为例,tlb_fill 会通过 cpu_arm_handle_mmu_fault 对虚实地址转换进行处理,如果该虚拟地址没有对应的物理地址或者权限不够等情况,cpu 就会出现 page_fault 异常。
72 /* try to fill the TLB and return an exception if error. If retaddr is
73 NULL, it means that the function was called in C code (i.e. not
74 from generated code or from helper.c) */
75 /* XXX: fix it to restore all registers */
76 void tlb_fill(CPUARMState *env1, target_ulong addr, int is_write, int mmu_idx,
77 uintptr_t retaddr)
78 {
79 TranslationBlock *tb;
80 CPUARMState *saved_env;
81 int ret;
82
83 saved_env = env;
84 env = env1;
85 ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
86 if (unlikely(ret)) {
87 if (retaddr) {
88 /* now we have a real cpu fault */
89 tb = tb_find_pc(retaddr);
90 if (tb) {
91 /* the PC is inside the translated code. It means that we have
92 a virtual CPU fault */
93 cpu_restore_state(tb, env, retaddr);
94 }
95 }
96 raise_exception(env->exception_index);
97 }
98 env = saved_env;
99 }
5. cpu_arm_handle_mmu_fault 里面主要是 page_walk,检查是否存在对应的物理地址和权限。如果有,更新 tlb;否则,保存出错信息。
2122 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
2123 int access_type, int mmu_idx)
2124 {
2125 uint32_t phys_addr;
2126 target_ulong page_size;
2127 int prot;
2128 int ret, is_user;
2129
2130 is_user = mmu_idx == MMU_USER_IDX;
2131 ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
2132 &page_size);
2133 if (ret == 0) {
2134 /* Map a single [sub]page. */
2135 phys_addr &= ~(uint32_t)0x3ff;
2136 address &= ~(uint32_t)0x3ff;
2137 tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
2138 return 0;
2139 }
2140
2141 if (access_type == 2) {
2142 env->cp15.c5_insn = ret;
2143 env->cp15.c6_insn = address;
2144 env->exception_index = EXCP_PREFETCH_ABORT;
2145 } else {
2146 env->cp15.c5_data = ret;
2147 if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6))
2148 env->cp15.c5_data |= (1 << 11);
2149 env->cp15.c6_data = address;
2150 env->exception_index = EXCP_DATA_ABORT;
2151 }
2152 return 1;
2153 }
qemu 的内存访问模拟,qemu 并没有将内存访问用 IR 表示,这样一方面会更加翻译难度,同时也占用大量的 code_buffer。Qemu 使用了外调函数来实现这个功能。在对 target 翻译过程中,对于内存访问操
作会生成如下形式的中间码:
tmp = gen_ld16s(addr, IS_USER(s));
783 static inline TCGv gen_ld16s(TCGv addr, int index)
784 {
785 TCGv tmp = tcg_temp_new_i32();
786 tcg_gen_qemu_ld16s(tmp, addr, index);
787 return tmp;
788 }
接下来在 tcg 翻译将中间码翻译成 host 机器码时,会首先查询 tlb,如果命中就直接返回结果;否则就调用以下 qemu_ld/st_helpers 中的函数进行 target 的虚拟地址和物理地址转换的工作。
928 #ifdef CONFIG_SOFTMMU
929
930 #include "../../softmmu_defs.h"
931
932 #ifdef CONFIG_TCG_PASS_AREG0
933 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
934 int mmu_idx) */
935 static const void * const qemu_ld_helpers[4] = {
936 helper_ldb_mmu,
937 helper_ldw_mmu,
938 helper_ldl_mmu,
939 helper_ldq_mmu,
940 };
941
942 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
943 uintxx_t val, int mmu_idx) */
944 static const void * const qemu_st_helpers[4] = {
945 helper_stb_mmu,
946 helper_stw_mmu,
947 helper_stl_mmu,
948 helper_stq_mmu,
949 };
950 #else
951 /* legacy helper signature: __ld_mmu(target_ulong addr, int
952 mmu_idx) */
953 static void *qemu_ld_helpers[4] = {
954 __ldb_mmu,
955 __ldw_mmu,
956 __ldl_mmu,
957 __ldq_mmu,
958 };
959
960 /* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
961 int mmu_idx) */
962 static void *qemu_st_helpers[4] = {
963 __stb_mmu,
964 __stw_mmu,
965 __stl_mmu,
966 __stq_mmu,
967 };
968 #endif
969 #endif
971 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
972
973 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
974 {
975 int addr_reg, data_reg, data_reg2, bswap;
976 #ifdef CONFIG_SOFTMMU
977 int mem_index, s_bits;
978 # if TARGET_LONG_BITS == 64
979 int addr_reg2;
980 # endif
981 uint32_t *label_ptr;
982 #endif
983
984 #ifdef TARGET_WORDS_BIGENDIAN
985 bswap = 1;
986 #else
987 bswap = 0;
988 #endif
989 data_reg = *args++;
990 if (opc == 3)
991 data_reg2 = *args++;
992 else
993 data_reg2 = 0; /* suppress warning */
994 addr_reg = *args++;
995 #ifdef CONFIG_SOFTMMU
996 # if TARGET_LONG_BITS == 64
997 addr_reg2 = *args++;
998 # endif
999 mem_index = *args;
1000 s_bits = opc & 3;
1001
1002 /* Should generate something like the following:
1003 * shr r8, addr_reg, #TARGET_PAGE_BITS
1004 * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8
1005 * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS
1006 */
1007 # if CPU_TLB_BITS > 8
1008 # error
1009 # endif
1010 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8,
1011 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1012 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1013 TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
1014 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
1015 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1016 /* In the
1017 * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
1018 * below, the offset is likely to exceed 12 bits if mem_index != 0 and
1019 * not exceed otherwise, so use an
1020 * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
1021 * before.
1022 */
1023 if (mem_index)
1024 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
1025 (mem_index << (TLB_SHIFT & 1)) |
1026 ((16 - (TLB_SHIFT >> 1)) << 8));
1027 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
1028 offsetof(CPUArchState, tlb_table[0][0].addr_read));
1029 tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
1030 TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1031 /* Check alignment. */
1032 if (s_bits)
1033 tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
1034 0, addr_reg, (1 << s_bits) - 1);
1035 # if TARGET_LONG_BITS == 64
1036 /* XXX: possibly we could use a block data load or writeback in
1037 * the first access. */
1038 tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
1039 offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
1040 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1041 TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
1042 # endif
1043 tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
1044 offsetof(CPUArchState, tlb_table[0][0].addend));
1045
1046 switch (opc) {
1047 case 0:
1048 tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1049 break;
1050 case 0 | 4:
1051 tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1052 break;
1053 case 1:
1054 tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1055 if (bswap) {
1056 tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
1057 }
1058 break;
1059 case 1 | 4:
1060 if (bswap) {
1061 tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1062 tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
1063 } else {
1064 tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1065 }
1066 break;
1067 case 2:
1068 default:
1069 tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1070 if (bswap) {
1071 tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
1072 }
1073 break;
1074 case 3:
1075 if (bswap) {
1076 tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
1077 tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
1078 tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
1079 tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
1080 } else {
1081 tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
1082 tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
1083 }
1084 break;
1085 }
1086
1087 label_ptr = (void *) s->code_ptr;
1088 tcg_out_b_noaddr(s, COND_EQ);
1089
1090 /* TODO: move this code to where the constants pool will be */
1091 if (addr_reg != TCG_REG_R0) {
1092 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1093 TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0));
1094 }
1095 # if TARGET_LONG_BITS == 32
1096 tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R1, 0, mem_index);
1097 # else
1098 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1099 TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
1100 tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
1101 # endif
1102 #ifdef CONFIG_TCG_PASS_AREG0
1103 /* XXX/FIXME: suboptimal and incorrect for 64 bit */
1104 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1105 tcg_target_call_iarg_regs[2], 0,
1106 tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
1107 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1108 tcg_target_call_iarg_regs[1], 0,
1109 tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
1110
1111 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1112 tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
1113 SHIFT_IMM_LSL(0));
1114 #endif
1115 tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
1116
1117 switch (opc) {
1118 case 0 | 4:
1119 tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
1120 break;
1121 case 1 | 4:
1122 tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
1123 break;
1124 case 0:
1125 case 1:
1126 case 2:
1127 default:
1128 if (data_reg != TCG_REG_R0) {
1129 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1130 data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
1131 }
1132 break;
1133 case 3:
1134 if (data_reg != TCG_REG_R0) {
1135 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1136 data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
1137 }
1138 if (data_reg2 != TCG_REG_R1) {
1139 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1140 data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
1141 }
1142 break;
1143 }
1144
1145 reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
1146 #else /* !CONFIG_SOFTMMU */
1147 if (GUEST_BASE) {
1148 uint32_t offset = GUEST_BASE;
1149 int i;
1150 int rot;
1151
1152 while (offset) {
1153 i = ctz32(offset) & ~1;
1154 rot = ((32 - i) << 7) & 0xf00;
1155
1156 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg,
1157 ((offset >> i) & 0xff) | rot);
1158 addr_reg = TCG_REG_R8;
1159 offset &= ~(0xff << i);
1160 }
1161 }
1162 switch (opc) {
1163 case 0:
1164 tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0);
1165 break;
1166 case 0 | 4:
1167 tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0);
1168 break;
1169 case 1:
1170 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1171 if (bswap) {
1172 tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1173 }
1174 break;
1175 case 1 | 4:
1176 if (bswap) {
1177 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1178 tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1179 } else {
1180 tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
1181 }
1182 break;
1183 case 2:
1184 default:
1185 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
1186 if (bswap) {
1187 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1188 }
1189 break;
1190 case 3:
1191 /* TODO: use block load -
1192 * check that data_reg2 > data_reg or the other way */
1193 if (data_reg == addr_reg) {
1194 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1195 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1196 } else {
1197 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1198 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1199 }
1200 if (bswap) {
1201 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1202 tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
1203 }
1204 break;
1205 }
1206 #endif
1207 }
上面的内存访问中将查找tlb部分直接翻译成了host指令,而对于target 的mmu转换则使用了外调相送的函数来实现的。这样对提升速度是有好处的。