QEMU MIPSr4k硬件扩展
Published: 7/1/2022
因为MIPS MALTA本身不在TLB或者页表中提供PTE_A等标志位,所以我只能修改QEMU来提供这样的机制。
至于说为什么当时MIPS不提供这样的机制,我想可能是设计目标不同,当时以功耗,效率以及成本的tradeoff,频繁地修改页表项似乎会带来较大的开销。
代码查看
qemu中提供的对应架构的代码主要位于target/<arch>
中,涉及到特定架构的TLB与TCG等内容则放在了target/<arch>/tcg
与target/<arch>/tcg/system
中。
以mips为例,涉及到TLB的文件主要有:
<target/mips/interal.h>
:存放MIPS相关结构体与函数声明,包括TLB表项结构体,这里以r4k为例:
typedef struct r4k_tlb_t r4k_tlb_t;
struct r4k_tlb_t {
target_ulong VPN;
uint32_t PageMask;
uint16_t ASID;
uint32_t MMID;
unsigned int G:1;
unsigned int C0:3;
unsigned int C1:3;
unsigned int V0:1;
unsigned int V1:1;
unsigned int D0:1;
unsigned int D1:1;
// ADD ACCESSED BIT FOR TLB AND PAGE ENTRIES.
unsigned int A0:1;
unsigned int A1:1;
unsigned int XI0:1;
unsigned int XI1:1;
unsigned int RI0:1;
unsigned int RI1:1;
unsigned int EHINV:1;
uint64_t PFN[2];
};
以下则是QEMU对TLB处理的抽象:
struct CPUMIPSTLBContext {
uint32_t nb_tlb;
uint32_t tlb_in_use;
int (*map_address)(CPUMIPSState *env, hwaddr *physical, int *prot,
target_ulong address, MMUAccessType access_type);
void (*helper_tlbwi)(CPUMIPSState *env);
void (*helper_tlbwr)(CPUMIPSState *env);
void (*helper_tlbp)(CPUMIPSState *env);
void (*helper_tlbr)(CPUMIPSState *env);
void (*helper_tlbinv)(CPUMIPSState *env);
void (*helper_tlbinvf)(CPUMIPSState *env);
union {
struct {
r4k_tlb_t tlb[MIPS_TLB_MAX];
} r4k;
} mmu;
};
从这里我们也可以发现QEMU提供了128(MIPS_TLB_MAX)项TLB。
target/mips/tcg/system/tlb_helper.<ch>
:这里存放具体处理tlb事项的代码,比如-
r4k_map_address
(该函数对应上述抽象中的map_address,用于具体访问TLB并查看相应权限位,我于这里添加了访问位的内容) -
mips_cpu_tlb_fill
(MIPS MALTA通过该函数访问对应内存,该函数调用get_pyhsical_address
来查看对应内存是否在QEMU本身提供设置的段中,并通过tlb_set_page
来“访问内存”,正是在该函数的调用链中,调用了上述的r4k_map_address
来检查并设置TLB,判断是否在TLB中,否则产生TLB异常)。 -
page_table_walk_refill
:该函数会查询QEMU自身设置的页表(并非宿主机页表)
-
注意内容
target/mips/cpu.c
中设置了相应的操作:
static const TCGCPUOps mips_tcg_ops = {
.initialize = mips_tcg_init,
.translate_code = mips_translate_code,
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
.restore_state_to_opc = mips_restore_state_to_opc,
#if !defined(CONFIG_USER_ONLY)
.tlb_fill = mips_cpu_tlb_fill,
.cpu_exec_interrupt = mips_cpu_exec_interrupt,
.cpu_exec_halt = mips_cpu_has_work,
.do_interrupt = mips_cpu_do_interrupt,
.do_transaction_failed = mips_cpu_do_transaction_failed,
.do_unaligned_access = mips_cpu_do_unaligned_access,
.io_recompile_replay_branch = mips_io_recompile_replay_branch,
#endif /* !CONFIG_USER_ONLY */
};
由于该结构体并没有设置aligned_access成员函数,根据下列代码我们可以发现会如之前所说的调用map_page
:
// accel/tcg/cputlb.c
static bool tlb_fill_align(CPUState *cpu, vaddr addr, MMUAccessType type,
int mmu_idx, MemOp memop, int size,
bool probe, uintptr_t ra)
{
const TCGCPUOps *ops = cpu->cc->tcg_ops;
CPUTLBEntryFull full;
if (ops->tlb_fill_align) {
if (ops->tlb_fill_align(cpu, &full, addr, type, mmu_idx,
memop, size, probe, ra)) {
tlb_set_page_full(cpu, mmu_idx, addr, &full);
return true;
}
} else {
/* Legacy behaviour is alignment before paging. */
if (addr & ((1u << memop_alignment_bits(memop)) - 1)) {
ops->do_unaligned_access(cpu, addr, type, mmu_idx, ra);
}
if (ops->tlb_fill(cpu, addr, size, type, mmu_idx, probe, ra)) {
return true;
}
}
assert(probe);
return false;
}
tcg
则是负责将目标架构机器码翻译成宿主机的机器码,同时处理不同架构的特性。
上述提到的tlb_fill_align
函数会被mmu_lookup
调用,mmu_lookup
正是负责翻译页面的函数,该函数进一步被do_ld<x>_mmu
函数调用(该函数具体地实现了访问内存):
上述调用链为:
之后只需要修改位于target/mips/tcg/system/tlb_helper.c
中的r4k_helper_tlbr
,将新增的A0与A1bit写入对应的entrylo寄存器即可。
env->CP0_EntryLo0 = tlb->G | (tlb->V0 << 1) | (tlb->D0 << 2) |
((uint64_t)tlb->RI0 << CP0EnLo_RI) |
((uint64_t)tlb->XI0 << CP0EnLo_XI) | (tlb->C0 << 3) |
(tlb->A0 << 5) |
get_entrylo_pfn_from_tlb(tlb->PFN[0] >> 12);
env->CP0_EntryLo1 = tlb->G | (tlb->V1 << 1) | (tlb->D1 << 2) |
((uint64_t)tlb->RI1 << CP0EnLo_RI) |
((uint64_t)tlb->XI1 << CP0EnLo_XI) | (tlb->C1 << 3) |
(tlb->A1 << 5) |
get_entrylo_pfn_from_tlb(tlb->PFN[1] >> 12);